Reading Prudential Life Insurance Data

Quick Snapshot of Data followed by seperating individual variable types

names(trainingData)
  [1] "Id"                  "Product_Info_1"      "Product_Info_2"      "Product_Info_3"     
  [5] "Product_Info_4"      "Product_Info_5"      "Product_Info_6"      "Product_Info_7"     
  [9] "Ins_Age"             "Ht"                  "Wt"                  "BMI"                
 [13] "Employment_Info_1"   "Employment_Info_2"   "Employment_Info_3"   "Employment_Info_4"  
 [17] "Employment_Info_5"   "Employment_Info_6"   "InsuredInfo_1"       "InsuredInfo_2"      
 [21] "InsuredInfo_3"       "InsuredInfo_4"       "InsuredInfo_5"       "InsuredInfo_6"      
 [25] "InsuredInfo_7"       "Insurance_History_1" "Insurance_History_2" "Insurance_History_3"
 [29] "Insurance_History_4" "Insurance_History_5" "Insurance_History_7" "Insurance_History_8"
 [33] "Insurance_History_9" "Family_Hist_1"       "Family_Hist_2"       "Family_Hist_3"      
 [37] "Family_Hist_4"       "Family_Hist_5"       "Medical_History_1"   "Medical_History_2"  
 [41] "Medical_History_3"   "Medical_History_4"   "Medical_History_5"   "Medical_History_6"  
 [45] "Medical_History_7"   "Medical_History_8"   "Medical_History_9"   "Medical_History_10" 
 [49] "Medical_History_11"  "Medical_History_12"  "Medical_History_13"  "Medical_History_14" 
 [53] "Medical_History_15"  "Medical_History_16"  "Medical_History_17"  "Medical_History_18" 
 [57] "Medical_History_19"  "Medical_History_20"  "Medical_History_21"  "Medical_History_22" 
 [61] "Medical_History_23"  "Medical_History_24"  "Medical_History_25"  "Medical_History_26" 
 [65] "Medical_History_27"  "Medical_History_28"  "Medical_History_29"  "Medical_History_30" 
 [69] "Medical_History_31"  "Medical_History_32"  "Medical_History_33"  "Medical_History_34" 
 [73] "Medical_History_35"  "Medical_History_36"  "Medical_History_37"  "Medical_History_38" 
 [77] "Medical_History_39"  "Medical_History_40"  "Medical_History_41"  "Medical_Keyword_1"  
 [81] "Medical_Keyword_2"   "Medical_Keyword_3"   "Medical_Keyword_4"   "Medical_Keyword_5"  
 [85] "Medical_Keyword_6"   "Medical_Keyword_7"   "Medical_Keyword_8"   "Medical_Keyword_9"  
 [89] "Medical_Keyword_10"  "Medical_Keyword_11"  "Medical_Keyword_12"  "Medical_Keyword_13" 
 [93] "Medical_Keyword_14"  "Medical_Keyword_15"  "Medical_Keyword_16"  "Medical_Keyword_17" 
 [97] "Medical_Keyword_18"  "Medical_Keyword_19"  "Medical_Keyword_20"  "Medical_Keyword_21" 
[101] "Medical_Keyword_22"  "Medical_Keyword_23"  "Medical_Keyword_24"  "Medical_Keyword_25" 
[105] "Medical_Keyword_26"  "Medical_Keyword_27"  "Medical_Keyword_28"  "Medical_Keyword_29" 
[109] "Medical_Keyword_30"  "Medical_Keyword_31"  "Medical_Keyword_32"  "Medical_Keyword_33" 
[113] "Medical_Keyword_34"  "Medical_Keyword_35"  "Medical_Keyword_36"  "Medical_Keyword_37" 
[117] "Medical_Keyword_38"  "Medical_Keyword_39"  "Medical_Keyword_40"  "Medical_Keyword_41" 
[121] "Medical_Keyword_42"  "Medical_Keyword_43"  "Medical_Keyword_44"  "Medical_Keyword_45" 
[125] "Medical_Keyword_46"  "Medical_Keyword_47"  "Medical_Keyword_48"  "Response"           
head(trainingData)
head(testData)
categoricalVarNames = c(paste('Product_Info_', c(1:3,5:7), sep=''), 
                      paste('Employment_Info_', c(2,3,5), sep=""), 
                      paste('InsuredInfo_', 1:7, sep=''), 
                      paste('Insurance_History_', c(1:4,7:9), sep=''),
                      'Family_Hist_1', 
                      paste('Medical_History_', c(2:14, 16:23, 25:31, 33:41), sep=''))
continuousVarNames = c('Product_Info_4', 
                       'Ins_Age', 
                       'Ht', 
                       'Wt', 
                       'BMI', 
                       'Employment_Info_1', 
                       'Employment_Info_4', 
                       'Employment_Info_6', 
                       'Insurance_History_5', 
                       'Family_Hist_2', 
                       'Family_Hist_3', 
                       'Family_Hist_4', 
                       'Family_Hist_5')
discreteVarNames = c('Medical_History_1', 
                     'Medical_History_15', 
                     'Medical_History_24', 
                     'Medical_History_32', 
                     paste('Medical_Keyword_', 1:48, sep=''))
train_categorical = trainingData %>% select(categoricalVarNames)
test_categorical = testData %>% select(categoricalVarNames)
train_continuous = trainingData %>% select(continuousVarNames)
test_continuous = testData %>% select(continuousVarNames)
train_discrete = trainingData %>% select(discreteVarNames)
test_discrete = testData %>% select(discreteVarNames)

Let’s take a look at the categorical variable type

skim(train_categorical)
Skim summary statistics
 n obs: 59381 
 n variables: 61 

-- Variable type:character -----------------------------------------------------
       variable missing complete     n min max empty n_unique
 Product_Info_2       0    59381 59381   2   2     0       19

-- Variable type:numeric -------------------------------------------------------
            variable missing complete     n   mean      sd p0 p25 p50 p75 p100     hist
   Employment_Info_2       0    59381 59381   8.64   4.23   1   9   9   9   38 <U+2582><U+2587><U+2583><U+2581><U+2581><U+2581><U+2581><U+2581>
   Employment_Info_3       0    59381 59381   1.3    0.72   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2582>
   Employment_Info_5       0    59381 59381   2.14   0.35   2   2   2   2    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2582>
       Family_Hist_1       0    59381 59381   2.69   0.48   1   2   3   3    3 <U+2581><U+2581><U+2581><U+2583><U+2581><U+2581><U+2581><U+2587>
 Insurance_History_1       0    59381 59381   1.73   0.45   1   1   2   2    2 <U+2583><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
 Insurance_History_2       0    59381 59381   1.06   0.33   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Insurance_History_3       0    59381 59381   2.15   0.99   1   1   3   3    3 <U+2586><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
 Insurance_History_4       0    59381 59381   1.96   0.95   1   1   2   3    3 <U+2587><U+2581><U+2581><U+2582><U+2581><U+2581><U+2581><U+2587>
 Insurance_History_7       0    59381 59381   1.9    0.97   1   1   1   3    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2586>
 Insurance_History_8       0    59381 59381   2.05   0.76   1   1   2   3    3 <U+2585><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2586>
 Insurance_History_9       0    59381 59381   2.42   0.51   1   2   2   3    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2586>
       InsuredInfo_1       0    59381 59381   1.21   0.42   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2582><U+2581><U+2581><U+2581><U+2581>
       InsuredInfo_2       0    59381 59381   2.01   0.086  2   2   2   2    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
       InsuredInfo_3       0    59381 59381   5.84   2.67   1   3   6   8   11 <U+2582><U+2587><U+2581><U+2586><U+2581><U+2587><U+2581><U+2582>
       InsuredInfo_4       0    59381 59381   2.88   0.32   2   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
       InsuredInfo_5       0    59381 59381   1.03   0.23   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
       InsuredInfo_6       0    59381 59381   1.41   0.49   1   1   1   2    2 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2586>
       InsuredInfo_7       0    59381 59381   1.04   0.27   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_10   58824      557 59381 141.12 107.76   0   8 229 240  240 <U+2585><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_11       0    59381 59381   2.99   0.095  1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_12       0    59381 59381   2.06   0.23   1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
  Medical_History_13       0    59381 59381   2.77   0.64   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_14       0    59381 59381   2.97   0.2    1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_16       0    59381 59381   1.33   0.74   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2582>
  Medical_History_17       0    59381 59381   2.98   0.15   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_18       0    59381 59381   1.05   0.23   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_19       0    59381 59381   1.03   0.18   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
   Medical_History_2       0    59381 59381 253.99 178.62   1 112 162 418  648 <U+2582><U+2587><U+2581><U+2581><U+2582><U+2581><U+2582><U+2581>
  Medical_History_20       0    59381 59381   1.99   0.12   1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
  Medical_History_21       0    59381 59381   1.11   0.31   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_22       0    59381 59381   1.98   0.13   1   2   2   2    2 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_23       0    59381 59381   2.53   0.85   1   3   3   3    3 <U+2582><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_25       0    59381 59381   1.19   0.41   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2582><U+2581><U+2581><U+2581><U+2581>
  Medical_History_26       0    59381 59381   2.81   0.39   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2582><U+2581><U+2581><U+2581><U+2587>
  Medical_History_27       0    59381 59381   2.98   0.2    1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_28       0    59381 59381   1.07   0.25   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_29       0    59381 59381   2.54   0.84   1   3   3   3    3 <U+2582><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
   Medical_History_3       0    59381 59381   2.1    0.3    1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
  Medical_History_30       0    59381 59381   2.04   0.2    1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
  Medical_History_31       0    59381 59381   2.99   0.17   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_33       0    59381 59381   2.8    0.59   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_34       0    59381 59381   2.69   0.72   1   3   3   3    3 <U+2582><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_35       0    59381 59381   1      0.064  1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_36       0    59381 59381   2.18   0.41   1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2582>
  Medical_History_37       0    59381 59381   1.94   0.24   1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
  Medical_History_38       0    59381 59381   1      0.069  1   1   1   1    2 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_39       0    59381 59381   2.83   0.56   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
   Medical_History_4       0    59381 59381   1.65   0.48   1   1   2   2    2 <U+2585><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_40       0    59381 59381   2.97   0.25   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_41       0    59381 59381   1.64   0.93   1   1   1   3    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2583>
   Medical_History_5       0    59381 59381   1.01   0.086  1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
   Medical_History_6       0    59381 59381   2.89   0.46   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
   Medical_History_7       0    59381 59381   2.01   0.17   1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
   Medical_History_8       0    59381 59381   2.04   0.29   1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
   Medical_History_9       0    59381 59381   1.77   0.42   1   2   2   2    3 <U+2582><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
      Product_Info_1       0    59381 59381   1.03   0.16   1   1   1   1    2 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
      Product_Info_3       0    59381 59381  24.42   5.07   1  26  26  26   38 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2587><U+2581><U+2581>
      Product_Info_5       0    59381 59381   2.01   0.083  2   2   2   2    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
      Product_Info_6       0    59381 59381   2.67   0.74   1   3   3   3    3 <U+2582><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
      Product_Info_7       0    59381 59381   1.04   0.29   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
skim(test_categorical)
Skim summary statistics
 n obs: 19765 
 n variables: 61 

-- Variable type:character -----------------------------------------------------
       variable missing complete     n min max empty n_unique
 Product_Info_2       0    19765 19765   2   2     0       19

-- Variable type:numeric -------------------------------------------------------
            variable missing complete     n   mean      sd p0 p25 p50 p75 p100     hist
   Employment_Info_2       0    19765 19765   8.44   4.19   1   9   9   9   37 <U+2582><U+2587><U+2583><U+2581><U+2581><U+2581><U+2581><U+2581>
   Employment_Info_3       0    19765 19765   1.33   0.74   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2582>
   Employment_Info_5       0    19765 19765   2.15   0.36   2   2   2   2    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2582>
       Family_Hist_1       0    19765 19765   2.69   0.48   1   2   3   3    3 <U+2581><U+2581><U+2581><U+2583><U+2581><U+2581><U+2581><U+2587>
 Insurance_History_1       0    19765 19765   1.71   0.46   1   1   2   2    2 <U+2583><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
 Insurance_History_2       0    19765 19765   1.05   0.31   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Insurance_History_3       0    19765 19765   2.18   0.98   1   1   3   3    3 <U+2586><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
 Insurance_History_4       0    19765 19765   1.92   0.95   1   1   2   3    3 <U+2587><U+2581><U+2581><U+2582><U+2581><U+2581><U+2581><U+2586>
 Insurance_History_7       0    19765 19765   1.89   0.96   1   1   1   3    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2586>
 Insurance_History_8       0    19765 19765   2.04   0.77   1   1   2   3    3 <U+2586><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2586>
 Insurance_History_9       0    19765 19765   2.4    0.51   1   2   2   3    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2586>
       InsuredInfo_1       0    19765 19765   1.2    0.41   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2582><U+2581><U+2581><U+2581><U+2581>
       InsuredInfo_2       0    19765 19765   2.01   0.086  2   2   2   2    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
       InsuredInfo_3       0    19765 19765   5.7    2.98   1   3   8   8   11 <U+2583><U+2585><U+2581><U+2581><U+2581><U+2587><U+2581><U+2581>
       InsuredInfo_4       0    19765 19765   2.89   0.31   2   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
       InsuredInfo_5       0    19765 19765   1.03   0.24   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
       InsuredInfo_6       0    19765 19765   1.42   0.49   1   1   1   2    2 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2586>
       InsuredInfo_7       0    19765 19765   1.03   0.26   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_10   19564      201 19765 149.82 102.92   0  22 223 240  240 <U+2585><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_11       0    19765 19765   2.99   0.092  1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_12       0    19765 19765   2.05   0.23   2   2   2   2    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_13       0    19765 19765   2.77   0.64   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_14       0    19765 19765   2.97   0.19   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_16       0    19765 19765   1.32   0.74   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2582>
  Medical_History_17       0    19765 19765   2.98   0.15   2   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_18       0    19765 19765   1.05   0.23   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_19       0    19765 19765   1.03   0.18   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
   Medical_History_2       0    19765 19765 249.79 177.94   1 112 162 407  647 <U+2582><U+2587><U+2581><U+2581><U+2582><U+2581><U+2582><U+2581>
  Medical_History_20       0    19765 19765   1.99   0.12   1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
  Medical_History_21       0    19765 19765   1.11   0.31   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_22       0    19765 19765   1.98   0.12   1   2   2   2    2 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_23       0    19765 19765   2.49   0.87   1   1   3   3    3 <U+2583><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_25       0    19765 19765   1.2    0.41   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2582><U+2581><U+2581><U+2581><U+2581>
  Medical_History_26       0    19765 19765   2.81   0.4    1   3   3   3    3 <U+2581><U+2581><U+2581><U+2582><U+2581><U+2581><U+2581><U+2587>
  Medical_History_27       0    19765 19765   2.98   0.19   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_28       0    19765 19765   1.07   0.25   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_29       0    19765 19765   2.56   0.83   1   3   3   3    3 <U+2582><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
   Medical_History_3       0    19765 19765   2.15   0.35   2   2   2   2    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2582>
  Medical_History_30       0    19765 19765   2.04   0.2    1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
  Medical_History_31       0    19765 19765   2.99   0.17   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_33       0    19765 19765   2.8    0.6    1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_34       0    19765 19765   2.7    0.71   1   3   3   3    3 <U+2582><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_35       0    19765 19765   1      0.069  1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_36       0    19765 19765   2.18   0.41   1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2582>
  Medical_History_37       0    19765 19765   1.94   0.25   1   2   2   2    2 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_38       0    19765 19765   1.01   0.072  1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_History_39       0    19765 19765   2.83   0.56   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
   Medical_History_4       0    19765 19765   1.66   0.47   1   1   2   2    2 <U+2585><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_40       0    19765 19765   2.97   0.25   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
  Medical_History_41       0    19765 19765   1.65   0.94   1   1   1   3    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2583>
   Medical_History_5       0    19765 19765   1.01   0.086  1   1   1   1    2 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
   Medical_History_6       0    19765 19765   2.89   0.46   1   3   3   3    3 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
   Medical_History_7       0    19765 19765   2.01   0.17   1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
   Medical_History_8       0    19765 19765   2.04   0.29   1   2   2   2    3 <U+2581><U+2581><U+2581><U+2587><U+2581><U+2581><U+2581><U+2581>
   Medical_History_9       0    19765 19765   1.78   0.42   1   2   2   2    2 <U+2582><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
      Product_Info_1       0    19765 19765   1.02   0.16   1   1   1   1    2 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
      Product_Info_3       0    19765 19765  24.32   5.18   2  26  26  26   37 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2587><U+2581><U+2581>
      Product_Info_5       0    19765 19765   2.01   0.081  2   2   2   2    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
      Product_Info_6       0    19765 19765   2.66   0.75   1   3   3   3    3 <U+2582><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
      Product_Info_7       0    19765 19765   1.04   0.29   1   1   1   1    3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>

Let’s explore continuous variables

skim(train_continuous)
Skim summary statistics
 n obs: 59381 
 n variables: 13 

-- Variable type:numeric -------------------------------------------------------
            variable missing complete     n   mean     sd p0       p25     p50   p75 p100
                 BMI       0    59381 59381 0.47   0.12    0     0.39  0.45    0.53  1   
   Employment_Info_1      19    59362 59381 0.078  0.082   0     0.035 0.06    0.1   1   
   Employment_Info_4    6779    52602 59381 0.0063 0.033   0     0     0       0     1   
   Employment_Info_6   10854    48527 59381 0.36   0.35    0     0.06  0.25    0.55  1   
       Family_Hist_2   28656    30725 59381 0.47   0.15    0     0.36  0.46    0.58  1   
       Family_Hist_3   34241    25140 59381 0.5    0.14    0     0.4   0.52    0.6   1   
       Family_Hist_4   19184    40197 59381 0.44   0.16    0     0.32  0.42    0.56  0.94
       Family_Hist_5   41811    17570 59381 0.48   0.13    0     0.4   0.51    0.58  1   
                  Ht       0    59381 59381 0.71   0.074   0     0.65  0.71    0.76  1   
             Ins_Age       0    59381 59381 0.41   0.2     0     0.24  0.4     0.57  1   
 Insurance_History_5   25396    33985 59381 0.0017 0.0073  0 4e-04     0.00097 0.002 1   
      Product_Info_4       0    59381 59381 0.33   0.28    0     0.077 0.23    0.49  1   
                  Wt       0    59381 59381 0.29   0.089   0     0.23  0.29    0.35  1   
     hist
 <U+2581><U+2581><U+2583><U+2587><U+2585><U+2582><U+2581><U+2581>
 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 <U+2587><U+2583><U+2582><U+2583><U+2581><U+2581><U+2581><U+2583>
 <U+2581><U+2582><U+2585><U+2587><U+2586><U+2583><U+2581><U+2581>
 <U+2581><U+2581><U+2583><U+2586><U+2587><U+2585><U+2581><U+2581>
 <U+2581><U+2582><U+2587><U+2587><U+2586><U+2585><U+2582><U+2581>
 <U+2581><U+2581><U+2583><U+2586><U+2587><U+2582><U+2581><U+2581>
 <U+2581><U+2581><U+2581><U+2581><U+2582><U+2587><U+2585><U+2581>
 <U+2583><U+2587><U+2587><U+2587><U+2587><U+2586><U+2581><U+2581>
 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 <U+2587><U+2587><U+2582><U+2585><U+2581><U+2581><U+2581><U+2582>
 <U+2581><U+2586><U+2587><U+2582><U+2581><U+2581><U+2581><U+2581>
skim(test_continuous)
Skim summary statistics
 n obs: 19765 
 n variables: 13 

-- Variable type:numeric -------------------------------------------------------
            variable missing complete     n   mean     sd    p0     p25     p50   p75 p100
                 BMI       0    19765 19765 0.47   0.12   0.097 0.38    0.45    0.53  1   
   Employment_Info_1       3    19762 19765 0.079  0.084  0     0.035   0.06    0.1   1   
   Employment_Info_4    2137    17628 19765 0.0065 0.035  0     0       0       0     1   
   Employment_Info_6    3787    15978 19765 0.37   0.35   0     0.055   0.25    0.6   1   
       Family_Hist_2    9880     9885 19765 0.47   0.16   0.043 0.36    0.46    0.58  0.94
       Family_Hist_3   11064     8701 19765 0.5    0.14   0     0.41    0.52    0.61  0.89
       Family_Hist_4    6677    13088 19765 0.45   0.16   0     0.32    0.44    0.56  1   
       Family_Hist_5   13624     6141 19765 0.49   0.13   0.027 0.42    0.52    0.59  0.85
                  Ht       0    19765 19765 0.71   0.074  0.35  0.65    0.71    0.76  1   
             Ins_Age       0    19765 19765 0.41   0.2    0     0.25    0.42    0.58  0.97
 Insurance_History_5    8105    11660 19765 0.0016 0.0026 0     0.00037 0.00083 0.002 0.13
      Product_Info_4       0    19765 19765 0.32   0.28   0     0.077   0.23    0.49  1   
                  Wt       0    19765 19765 0.29   0.09   0.084 0.23    0.29    0.35  0.88
     hist
 <U+2581><U+2582><U+2587><U+2587><U+2583><U+2581><U+2581><U+2581>
 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 <U+2587><U+2583><U+2582><U+2583><U+2581><U+2581><U+2581><U+2583>
 <U+2581><U+2582><U+2586><U+2587><U+2585><U+2583><U+2582><U+2581>
 <U+2581><U+2581><U+2582><U+2585><U+2587><U+2587><U+2582><U+2581>
 <U+2581><U+2583><U+2587><U+2587><U+2586><U+2583><U+2581><U+2581>
 <U+2581><U+2581><U+2582><U+2585><U+2586><U+2587><U+2582><U+2581>
 <U+2581><U+2581><U+2581><U+2585><U+2587><U+2585><U+2581><U+2581>
 <U+2583><U+2587><U+2587><U+2587><U+2587><U+2587><U+2582><U+2581>
 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 <U+2587><U+2587><U+2582><U+2585><U+2581><U+2581><U+2581><U+2582>
 <U+2582><U+2587><U+2587><U+2582><U+2581><U+2581><U+2581><U+2581>

Let’s explore discrete variables

skim(train_discrete)
Skim summary statistics
 n obs: 59381 
 n variables: 52 

-- Variable type:numeric -------------------------------------------------------
           variable missing complete     n     mean     sd p0 p25 p50 p75 p100     hist
  Medical_History_1    8889    50492 59381   7.96   13.03   0   2   4   9  240 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_History_15   44596    14785 59381 123.76   98.52   0  17 117 240  240 <U+2587><U+2582><U+2582><U+2582><U+2581><U+2581><U+2581><U+2587>
 Medical_History_24   55580     3801 59381  50.64   78.15   0   1   8  64  240 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_History_32   58274     1107 59381  11.97   38.72   0   0   0   2  240 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_1       0    59381 59381   0.042   0.2    0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_10       0    59381 59381   0.036   0.19   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_11       0    59381 59381   0.058   0.23   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_12       0    59381 59381   0.01    0.1    0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_13       0    59381 59381   0.006   0.077  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_14       0    59381 59381   0.0078  0.088  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_15       0    59381 59381   0.19    0.39   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2582>
 Medical_Keyword_16       0    59381 59381   0.013   0.11   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_17       0    59381 59381   0.0092  0.095  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_18       0    59381 59381   0.0075  0.086  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_19       0    59381 59381   0.0093  0.096  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_2       0    59381 59381   0.0089  0.094  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_20       0    59381 59381   0.0081  0.09   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_21       0    59381 59381   0.015   0.12   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_22       0    59381 59381   0.037   0.19   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_23       0    59381 59381   0.098   0.3    0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_24       0    59381 59381   0.019   0.14   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_25       0    59381 59381   0.089   0.29   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_26       0    59381 59381   0.013   0.12   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_27       0    59381 59381   0.012   0.11   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_28       0    59381 59381   0.015   0.12   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_29       0    59381 59381   0.012   0.11   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_3       0    59381 59381   0.049   0.22   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_30       0    59381 59381   0.025   0.16   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_31       0    59381 59381   0.011   0.1    0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_32       0    59381 59381   0.021   0.14   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_33       0    59381 59381   0.023   0.15   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_34       0    59381 59381   0.021   0.14   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_35       0    59381 59381   0.0069  0.083  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_36       0    59381 59381   0.01    0.1    0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_37       0    59381 59381   0.067   0.25   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_38       0    59381 59381   0.0068  0.082  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_39       0    59381 59381   0.014   0.12   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_4       0    59381 59381   0.015   0.12   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_40       0    59381 59381   0.057   0.23   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_41       0    59381 59381   0.01    0.1    0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_42       0    59381 59381   0.046   0.21   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_43       0    59381 59381   0.011   0.1    0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_44       0    59381 59381   0.0075  0.086  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_45       0    59381 59381   0.014   0.12   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_46       0    59381 59381   0.0085  0.092  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_47       0    59381 59381   0.02    0.14   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_48       0    59381 59381   0.054   0.23   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_5       0    59381 59381   0.0086  0.092  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_6       0    59381 59381   0.013   0.11   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_7       0    59381 59381   0.014   0.12   0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_8       0    59381 59381   0.01    0.1    0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_9       0    59381 59381   0.0067  0.081  0   0   0   0    1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
skim(test_discrete)
Skim summary statistics
 n obs: 19765 
 n variables: 52 

-- Variable type:numeric -------------------------------------------------------
           variable missing complete     n     mean     sd p0 p25 p50    p75 p100     hist
  Medical_History_1    2972    16793 19765   7.83   12.56   0   2   4   9     235 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_History_15   14864     4901 19765 125.69   99.12   0  19 119 240     240 <U+2586><U+2582><U+2582><U+2582><U+2581><U+2581><U+2581><U+2587>
 Medical_History_24   18585     1180 19765  49.85   77.21   0   1   9  60.25  240 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_History_32   19414      351 19765  10.94   36.81   0   0   0   1.5   240 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_1       0    19765 19765   0.043   0.2    0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_10       0    19765 19765   0.037   0.19   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_11       0    19765 19765   0.061   0.24   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_12       0    19765 19765   0.0077  0.088  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_13       0    19765 19765   0.0079  0.089  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_14       0    19765 19765   0.0085  0.092  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_15       0    19765 19765   0.2     0.4    0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2582>
 Medical_Keyword_16       0    19765 19765   0.011   0.11   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_17       0    19765 19765   0.01    0.1    0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_18       0    19765 19765   0.007   0.083  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_19       0    19765 19765   0.0086  0.092  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_2       0    19765 19765   0.0077  0.087  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_20       0    19765 19765   0.0075  0.086  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_21       0    19765 19765   0.015   0.12   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_22       0    19765 19765   0.039   0.19   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_23       0    19765 19765   0.1     0.3    0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_24       0    19765 19765   0.019   0.14   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_25       0    19765 19765   0.1     0.31   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_26       0    19765 19765   0.013   0.11   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_27       0    19765 19765   0.011   0.1    0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_28       0    19765 19765   0.015   0.12   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_29       0    19765 19765   0.011   0.1    0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_3       0    19765 19765   0.053   0.22   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_30       0    19765 19765   0.024   0.15   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_31       0    19765 19765   0.011   0.1    0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_32       0    19765 19765   0.02    0.14   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_33       0    19765 19765   0.024   0.15   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_34       0    19765 19765   0.019   0.14   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_35       0    19765 19765   0.0067  0.081  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_36       0    19765 19765   0.011   0.11   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_37       0    19765 19765   0.066   0.25   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_38       0    19765 19765   0.007   0.083  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_39       0    19765 19765   0.014   0.12   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_4       0    19765 19765   0.014   0.12   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_40       0    19765 19765   0.057   0.23   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_41       0    19765 19765   0.011   0.1    0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_42       0    19765 19765   0.045   0.21   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_43       0    19765 19765   0.01    0.1    0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_44       0    19765 19765   0.0082  0.09   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_45       0    19765 19765   0.014   0.12   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_46       0    19765 19765   0.0086  0.092  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_47       0    19765 19765   0.018   0.13   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
 Medical_Keyword_48       0    19765 19765   0.056   0.23   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_5       0    19765 19765   0.0079  0.089  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_6       0    19765 19765   0.013   0.11   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_7       0    19765 19765   0.012   0.11   0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_8       0    19765 19765   0.01    0.1    0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
  Medical_Keyword_9       0    19765 19765   0.0076  0.087  0   0   0   0       1 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>

From the above skim statements, we see some missing data. What is the precentage of missing data in training and test datasets

sum(is.na(trainingData)) / (nrow(trainingData) * ncol(trainingData))
[1] 0.05171885
sum(is.na(testData)) / (nrow(testData) * ncol(testData))
[1] 0.05205894
apply(trainingData, 2, function(x){
  sum(is.na(x))
})
                 Id      Product_Info_1      Product_Info_2      Product_Info_3 
                  0                   0                   0                   0 
     Product_Info_4      Product_Info_5      Product_Info_6      Product_Info_7 
                  0                   0                   0                   0 
            Ins_Age                  Ht                  Wt                 BMI 
                  0                   0                   0                   0 
  Employment_Info_1   Employment_Info_2   Employment_Info_3   Employment_Info_4 
                 19                   0                   0                6779 
  Employment_Info_5   Employment_Info_6       InsuredInfo_1       InsuredInfo_2 
                  0               10854                   0                   0 
      InsuredInfo_3       InsuredInfo_4       InsuredInfo_5       InsuredInfo_6 
                  0                   0                   0                   0 
      InsuredInfo_7 Insurance_History_1 Insurance_History_2 Insurance_History_3 
                  0                   0                   0                   0 
Insurance_History_4 Insurance_History_5 Insurance_History_7 Insurance_History_8 
                  0               25396                   0                   0 
Insurance_History_9       Family_Hist_1       Family_Hist_2       Family_Hist_3 
                  0                   0               28656               34241 
      Family_Hist_4       Family_Hist_5   Medical_History_1   Medical_History_2 
              19184               41811                8889                   0 
  Medical_History_3   Medical_History_4   Medical_History_5   Medical_History_6 
                  0                   0                   0                   0 
  Medical_History_7   Medical_History_8   Medical_History_9  Medical_History_10 
                  0                   0                   0               58824 
 Medical_History_11  Medical_History_12  Medical_History_13  Medical_History_14 
                  0                   0                   0                   0 
 Medical_History_15  Medical_History_16  Medical_History_17  Medical_History_18 
              44596                   0                   0                   0 
 Medical_History_19  Medical_History_20  Medical_History_21  Medical_History_22 
                  0                   0                   0                   0 
 Medical_History_23  Medical_History_24  Medical_History_25  Medical_History_26 
                  0               55580                   0                   0 
 Medical_History_27  Medical_History_28  Medical_History_29  Medical_History_30 
                  0                   0                   0                   0 
 Medical_History_31  Medical_History_32  Medical_History_33  Medical_History_34 
                  0               58274                   0                   0 
 Medical_History_35  Medical_History_36  Medical_History_37  Medical_History_38 
                  0                   0                   0                   0 
 Medical_History_39  Medical_History_40  Medical_History_41   Medical_Keyword_1 
                  0                   0                   0                   0 
  Medical_Keyword_2   Medical_Keyword_3   Medical_Keyword_4   Medical_Keyword_5 
                  0                   0                   0                   0 
  Medical_Keyword_6   Medical_Keyword_7   Medical_Keyword_8   Medical_Keyword_9 
                  0                   0                   0                   0 
 Medical_Keyword_10  Medical_Keyword_11  Medical_Keyword_12  Medical_Keyword_13 
                  0                   0                   0                   0 
 Medical_Keyword_14  Medical_Keyword_15  Medical_Keyword_16  Medical_Keyword_17 
                  0                   0                   0                   0 
 Medical_Keyword_18  Medical_Keyword_19  Medical_Keyword_20  Medical_Keyword_21 
                  0                   0                   0                   0 
 Medical_Keyword_22  Medical_Keyword_23  Medical_Keyword_24  Medical_Keyword_25 
                  0                   0                   0                   0 
 Medical_Keyword_26  Medical_Keyword_27  Medical_Keyword_28  Medical_Keyword_29 
                  0                   0                   0                   0 
 Medical_Keyword_30  Medical_Keyword_31  Medical_Keyword_32  Medical_Keyword_33 
                  0                   0                   0                   0 
 Medical_Keyword_34  Medical_Keyword_35  Medical_Keyword_36  Medical_Keyword_37 
                  0                   0                   0                   0 
 Medical_Keyword_38  Medical_Keyword_39  Medical_Keyword_40  Medical_Keyword_41 
                  0                   0                   0                   0 
 Medical_Keyword_42  Medical_Keyword_43  Medical_Keyword_44  Medical_Keyword_45 
                  0                   0                   0                   0 
 Medical_Keyword_46  Medical_Keyword_47  Medical_Keyword_48            Response 
                  0                   0                   0                   0 
apply(testData, 2, function(x){
  sum(is.na(x))
})
                 Id      Product_Info_1      Product_Info_2      Product_Info_3 
                  0                   0                   0                   0 
     Product_Info_4      Product_Info_5      Product_Info_6      Product_Info_7 
                  0                   0                   0                   0 
            Ins_Age                  Ht                  Wt                 BMI 
                  0                   0                   0                   0 
  Employment_Info_1   Employment_Info_2   Employment_Info_3   Employment_Info_4 
                  3                   0                   0                2137 
  Employment_Info_5   Employment_Info_6       InsuredInfo_1       InsuredInfo_2 
                  0                3787                   0                   0 
      InsuredInfo_3       InsuredInfo_4       InsuredInfo_5       InsuredInfo_6 
                  0                   0                   0                   0 
      InsuredInfo_7 Insurance_History_1 Insurance_History_2 Insurance_History_3 
                  0                   0                   0                   0 
Insurance_History_4 Insurance_History_5 Insurance_History_7 Insurance_History_8 
                  0                8105                   0                   0 
Insurance_History_9       Family_Hist_1       Family_Hist_2       Family_Hist_3 
                  0                   0                9880               11064 
      Family_Hist_4       Family_Hist_5   Medical_History_1   Medical_History_2 
               6677               13624                2972                   0 
  Medical_History_3   Medical_History_4   Medical_History_5   Medical_History_6 
                  0                   0                   0                   0 
  Medical_History_7   Medical_History_8   Medical_History_9  Medical_History_10 
                  0                   0                   0               19564 
 Medical_History_11  Medical_History_12  Medical_History_13  Medical_History_14 
                  0                   0                   0                   0 
 Medical_History_15  Medical_History_16  Medical_History_17  Medical_History_18 
              14864                   0                   0                   0 
 Medical_History_19  Medical_History_20  Medical_History_21  Medical_History_22 
                  0                   0                   0                   0 
 Medical_History_23  Medical_History_24  Medical_History_25  Medical_History_26 
                  0               18585                   0                   0 
 Medical_History_27  Medical_History_28  Medical_History_29  Medical_History_30 
                  0                   0                   0                   0 
 Medical_History_31  Medical_History_32  Medical_History_33  Medical_History_34 
                  0               19414                   0                   0 
 Medical_History_35  Medical_History_36  Medical_History_37  Medical_History_38 
                  0                   0                   0                   0 
 Medical_History_39  Medical_History_40  Medical_History_41   Medical_Keyword_1 
                  0                   0                   0                   0 
  Medical_Keyword_2   Medical_Keyword_3   Medical_Keyword_4   Medical_Keyword_5 
                  0                   0                   0                   0 
  Medical_Keyword_6   Medical_Keyword_7   Medical_Keyword_8   Medical_Keyword_9 
                  0                   0                   0                   0 
 Medical_Keyword_10  Medical_Keyword_11  Medical_Keyword_12  Medical_Keyword_13 
                  0                   0                   0                   0 
 Medical_Keyword_14  Medical_Keyword_15  Medical_Keyword_16  Medical_Keyword_17 
                  0                   0                   0                   0 
 Medical_Keyword_18  Medical_Keyword_19  Medical_Keyword_20  Medical_Keyword_21 
                  0                   0                   0                   0 
 Medical_Keyword_22  Medical_Keyword_23  Medical_Keyword_24  Medical_Keyword_25 
                  0                   0                   0                   0 
 Medical_Keyword_26  Medical_Keyword_27  Medical_Keyword_28  Medical_Keyword_29 
                  0                   0                   0                   0 
 Medical_Keyword_30  Medical_Keyword_31  Medical_Keyword_32  Medical_Keyword_33 
                  0                   0                   0                   0 
 Medical_Keyword_34  Medical_Keyword_35  Medical_Keyword_36  Medical_Keyword_37 
                  0                   0                   0                   0 
 Medical_Keyword_38  Medical_Keyword_39  Medical_Keyword_40  Medical_Keyword_41 
                  0                   0                   0                   0 
 Medical_Keyword_42  Medical_Keyword_43  Medical_Keyword_44  Medical_Keyword_45 
                  0                   0                   0                   0 
 Medical_Keyword_46  Medical_Keyword_47  Medical_Keyword_48 
                  0                   0                   0 

Now let’s look at the response variable. The response variable is of nominal type and has levels from 1 to 8.

temp1 = ggplot(trainingData, aes(x = Response)) + geom_histogram(fill = 'Blue', alpha=0.5)
ggplotly(temp1, color = ~Response, width = 400, height = 200) %>% layout(title= 'Distribution of Response Variable', plot_bgcolor= 'white', xaxis = list(gridcolor = 'lightgrey', opacity = 0.2), yaxis = list(gridcolor = 'lightgrey', opacity = 0.2), autosize = F, width = 400, height = 200)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

Based on the above plot we see that Response = 8 is most common and Response = 3 is least common in the dataset. Now let’s look at how missing values relate to these responses

noofNAperResponseType <- sapply(sort(unique(trainingData$Response)), function(x) {
                            apply(trainingData[trainingData$Response == x, ], 2, function(y) {
              sum(is.na(y)) 
                            }) 
                        })
noofNAperResponseType
                    [,1] [,2] [,3] [,4] [,5]  [,6] [,7]  [,8]
Id                     0    0    0    0    0     0    0     0
Product_Info_1         0    0    0    0    0     0    0     0
Product_Info_2         0    0    0    0    0     0    0     0
Product_Info_3         0    0    0    0    0     0    0     0
Product_Info_4         0    0    0    0    0     0    0     0
Product_Info_5         0    0    0    0    0     0    0     0
Product_Info_6         0    0    0    0    0     0    0     0
Product_Info_7         0    0    0    0    0     0    0     0
Ins_Age                0    0    0    0    0     0    0     0
Ht                     0    0    0    0    0     0    0     0
Wt                     0    0    0    0    0     0    0     0
BMI                    0    0    0    0    0     0    0     0
Employment_Info_1     16    0    0    0    0     3    0     0
Employment_Info_2      0    0    0    0    0     0    0     0
Employment_Info_3      0    0    0    0    0     0    0     0
Employment_Info_4    789  679   85  148  560  1752  883  1883
Employment_Info_5      0    0    0    0    0     0    0     0
Employment_Info_6   1171 1194  220  333 1221  1688 1365  3662
InsuredInfo_1          0    0    0    0    0     0    0     0
InsuredInfo_2          0    0    0    0    0     0    0     0
InsuredInfo_3          0    0    0    0    0     0    0     0
InsuredInfo_4          0    0    0    0    0     0    0     0
InsuredInfo_5          0    0    0    0    0     0    0     0
InsuredInfo_6          0    0    0    0    0     0    0     0
InsuredInfo_7          0    0    0    0    0     0    0     0
Insurance_History_1    0    0    0    0    0     0    0     0
Insurance_History_2    0    0    0    0    0     0    0     0
Insurance_History_3    0    0    0    0    0     0    0     0
Insurance_History_4    0    0    0    0    0     0    0     0
Insurance_History_5 2848 2710  565  725 2372  4369 2901  8906
Insurance_History_7    0    0    0    0    0     0    0     0
Insurance_History_8    0    0    0    0    0     0    0     0
Insurance_History_9    0    0    0    0    0     0    0     0
Family_Hist_1          0    0    0    0    0     0    0     0
Family_Hist_2       4049 3891  479  551 2840  5628 4354  6864
Family_Hist_3       2709 3101  611  971 3060  6170 4073 13546
Family_Hist_4       3099 2744  303  355 1929  3679 2994  4081
Family_Hist_5       3373 4017  745 1119 3701  7835 5221 15800
Medical_History_1    694  694  231  311  796  1744  998  3421
Medical_History_2      0    0    0    0    0     0    0     0
Medical_History_3      0    0    0    0    0     0    0     0
Medical_History_4      0    0    0    0    0     0    0     0
Medical_History_5      0    0    0    0    0     0    0     0
Medical_History_6      0    0    0    0    0     0    0     0
Medical_History_7      0    0    0    0    0     0    0     0
Medical_History_8      0    0    0    0    0     0    0     0
Medical_History_9      0    0    0    0    0     0    0     0
Medical_History_10  6106 6468 1003 1419 5379 11042 7980 19427
Medical_History_11     0    0    0    0    0     0    0     0
Medical_History_12     0    0    0    0    0     0    0     0
Medical_History_13     0    0    0    0    0     0    0     0
Medical_History_14     0    0    0    0    0     0    0     0
Medical_History_15  3981 4518  111  154 4241  8147 6428 17016
Medical_History_16     0    0    0    0    0     0    0     0
Medical_History_17     0    0    0    0    0     0    0     0
Medical_History_18     0    0    0    0    0     0    0     0
Medical_History_19     0    0    0    0    0     0    0     0
Medical_History_20     0    0    0    0    0     0    0     0
Medical_History_21     0    0    0    0    0     0    0     0
Medical_History_22     0    0    0    0    0     0    0     0
Medical_History_23     0    0    0    0    0     0    0     0
Medical_History_24  5755 6018  928 1327 5078 10047 7580 18847
Medical_History_25     0    0    0    0    0     0    0     0
Medical_History_26     0    0    0    0    0     0    0     0
Medical_History_27     0    0    0    0    0     0    0     0
Medical_History_28     0    0    0    0    0     0    0     0
Medical_History_29     0    0    0    0    0     0    0     0
Medical_History_30     0    0    0    0    0     0    0     0
Medical_History_31     0    0    0    0    0     0    0     0
Medical_History_32  6059 6413  957 1352 5338 10690 7999 19466
Medical_History_33     0    0    0    0    0     0    0     0
Medical_History_34     0    0    0    0    0     0    0     0
Medical_History_35     0    0    0    0    0     0    0     0
Medical_History_36     0    0    0    0    0     0    0     0
Medical_History_37     0    0    0    0    0     0    0     0
Medical_History_38     0    0    0    0    0     0    0     0
Medical_History_39     0    0    0    0    0     0    0     0
Medical_History_40     0    0    0    0    0     0    0     0
Medical_History_41     0    0    0    0    0     0    0     0
Medical_Keyword_1      0    0    0    0    0     0    0     0
Medical_Keyword_2      0    0    0    0    0     0    0     0
Medical_Keyword_3      0    0    0    0    0     0    0     0
Medical_Keyword_4      0    0    0    0    0     0    0     0
Medical_Keyword_5      0    0    0    0    0     0    0     0
Medical_Keyword_6      0    0    0    0    0     0    0     0
Medical_Keyword_7      0    0    0    0    0     0    0     0
Medical_Keyword_8      0    0    0    0    0     0    0     0
Medical_Keyword_9      0    0    0    0    0     0    0     0
Medical_Keyword_10     0    0    0    0    0     0    0     0
Medical_Keyword_11     0    0    0    0    0     0    0     0
Medical_Keyword_12     0    0    0    0    0     0    0     0
Medical_Keyword_13     0    0    0    0    0     0    0     0
Medical_Keyword_14     0    0    0    0    0     0    0     0
Medical_Keyword_15     0    0    0    0    0     0    0     0
Medical_Keyword_16     0    0    0    0    0     0    0     0
Medical_Keyword_17     0    0    0    0    0     0    0     0
Medical_Keyword_18     0    0    0    0    0     0    0     0
Medical_Keyword_19     0    0    0    0    0     0    0     0
Medical_Keyword_20     0    0    0    0    0     0    0     0
Medical_Keyword_21     0    0    0    0    0     0    0     0
Medical_Keyword_22     0    0    0    0    0     0    0     0
Medical_Keyword_23     0    0    0    0    0     0    0     0
Medical_Keyword_24     0    0    0    0    0     0    0     0
Medical_Keyword_25     0    0    0    0    0     0    0     0
Medical_Keyword_26     0    0    0    0    0     0    0     0
Medical_Keyword_27     0    0    0    0    0     0    0     0
Medical_Keyword_28     0    0    0    0    0     0    0     0
Medical_Keyword_29     0    0    0    0    0     0    0     0
Medical_Keyword_30     0    0    0    0    0     0    0     0
Medical_Keyword_31     0    0    0    0    0     0    0     0
Medical_Keyword_32     0    0    0    0    0     0    0     0
Medical_Keyword_33     0    0    0    0    0     0    0     0
Medical_Keyword_34     0    0    0    0    0     0    0     0
Medical_Keyword_35     0    0    0    0    0     0    0     0
Medical_Keyword_36     0    0    0    0    0     0    0     0
Medical_Keyword_37     0    0    0    0    0     0    0     0
Medical_Keyword_38     0    0    0    0    0     0    0     0
Medical_Keyword_39     0    0    0    0    0     0    0     0
Medical_Keyword_40     0    0    0    0    0     0    0     0
Medical_Keyword_41     0    0    0    0    0     0    0     0
Medical_Keyword_42     0    0    0    0    0     0    0     0
Medical_Keyword_43     0    0    0    0    0     0    0     0
Medical_Keyword_44     0    0    0    0    0     0    0     0
Medical_Keyword_45     0    0    0    0    0     0    0     0
Medical_Keyword_46     0    0    0    0    0     0    0     0
 [ reached getOption("max.print") -- omitted 3 rows ]
round(colSums(noofNAperResponseType) / sum(noofNAperResponseType), digits=2)
[1] 0.10 0.11 0.02 0.02 0.09 0.19 0.13 0.34

From this we see that data with Response as 8 has most missing values whereas Response equals 3 has least missing values. Now let’s look for Duplicated and Constant Rows

cat('Number of duplicated rows in Training dataset = ', nrow(trainingData) - nrow(unique(trainingData)), '\n')
Number of duplicated rows in Training dataset =  0 
cat('Number of constant rows in Training dataset = ', sum(sapply(trainingData,                                                         function(x){                                                                       length(unique(x)) == 1})), '\n')
Number of constant rows in Training dataset =  0 
                                                        #How to tell a dplyr function to apply it                                                             columnwise
cat('Number of duplicated rows in Test dataset = ', nrow(testData) - nrow(unique(testData)), '\n')
Number of duplicated rows in Test dataset =  0 
cat('Number of constant rows in Test dataset = ', sum(sapply(testData, function(x){                                                                       length(unique(x)) == 1})), '\n')
Number of constant rows in Test dataset =  0 

Now that we have looked at missing values, checked for duplication and constant rows. Let’s look at individual data types and visualize the distribution.

Plot densities of continuous features

tempDataset = train_continuous %>% select(c(1, 2)) %>% gather()
tempdenPlot = ggplot(data = tempDataset) + geom_density(mapping = aes(x = value, fill = key, alpha = 0.5)) + labs(title = 'Density Plots')
ggplotly(tempdenPlot, height= 600, width = 800) %>% 
      layout(plot_bgcolor='transparent', paper_bgcolor= 'transparent', autosize = F, width = 800, height = 600)
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

physicalAttDataset = train_continuous %>% select(Ht, Wt, BMI) %>% gather()
tempdenPlot = ggplot(data = physicalAttDataset) + geom_density(mapping = aes(x = value, fill = key, alpha = 0.5)) + labs(title = 'Density Plots of Physical Characteristics')
ggplotly(tempdenPlot, height= 600, width = 800) %>% 
      layout(plot_bgcolor='transparent', paper_bgcolor= 'transparent', autosize = F, width = 800, height = 600)
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

employAttDataset = train_continuous %>% select(Employment_Info_1, Employment_Info_6) %>% gather()
tempdenPlot = ggplot(data = employAttDataset) + geom_density(mapping = aes(x = value, fill = key, alpha = 0.5)) + labs(title = 'Density Plots of Employment Info 1 and 6')
ggplotly(tempdenPlot, height= 600, width = 800) %>% 
      layout(plot_bgcolor='transparent', paper_bgcolor= 'transparent', autosize = F, width = 800, height = 600)
Removed 10873 rows containing non-finite values (stat_density).Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

employDataset = train_continuous %>% select(Employment_Info_4) %>% gather()
tempdenPlot = ggplot(data = employDataset) + geom_density(mapping = aes(x = value, fill = key, alpha = 0.5)) + labs(title = 'Density Plots of Employment Info 4')
ggplotly(tempdenPlot, height= 600, width = 800) %>% 
      layout(plot_bgcolor='transparent', paper_bgcolor= 'transparent', autosize = F, width = 800, height = 600)
Removed 6779 rows containing non-finite values (stat_density).Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

familyHistDataset = train_continuous %>% select(starts_with('Family_Hist_')) %>% gather()
tempdenPlot = ggplot(data = familyHistDataset) + geom_density(mapping = aes(x = value, fill = key, alpha = 0.5)) + labs(title = 'Density Plots of Family History')
ggplotly(tempdenPlot, height= 600, width = 800) %>% 
      layout(plot_bgcolor='transparent', paper_bgcolor= 'transparent', autosize = F, width = 800, height = 600)
Removed 123892 rows containing non-finite values (stat_density).Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

insurHistDataset = train_continuous %>% select(c(9)) %>% gather()
tempdenPlot = ggplot(data = insurHistDataset) + geom_density(mapping = aes(x = value, fill = key, alpha = 0.5)) + labs(title = 'Density Plots')
ggplotly(tempdenPlot, height= 800, width = 1000) %>% 
      layout(plot_bgcolor='transparent', paper_bgcolor= 'transparent', autosize = F, width = 800, height = 800)
Removed 25396 rows containing non-finite values (stat_density).Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

Plot Boxplots of Continuous Variables

tempDataset = train_continuous %>% select(c(1, 2, 9)) %>% gather()
plot_ly(data = tempDataset, type = 'box', split = ~key, y = ~value) %>% layout(title = 'Box Plots')
Ignoring 25396 observationsIgnoring 25396 observations

plot_ly(data = physicalAttDataset, type = 'box', split = ~key, y = ~value) %>% layout(title = 'Box Plots of Physical Characteristics')

plot_ly(data = familyHistDataset, type = 'box', split = ~key, y = ~value) %>% layout(title = 'Box Plots of Family History Attributes')
Ignoring 123892 observationsIgnoring 123892 observations

employInfoData = train_continuous %>% select(starts_with('Employment_Info_')) %>% gather()
plot_ly(data = employInfoData, type = 'box', split = ~key, y = ~value) %>% layout(title = 'Box Plots of Employment Information Attributes')
Ignoring 17652 observationsIgnoring 17652 observations

Histograms of Categorical Variables

subSelectionVals = c('Product_Info', 'Employment_Info', 'InsuredInfo', 'Insurance_History', 'Family_Hist', 'Medical_History')
for(i in subSelectionVals) {
  tempCatDataset = train_categorical %>% select(starts_with(i))
  tempcolNames = names(tempCatDataset)
   for(j in tempcolNames){
    if(j == 'Product_Info_2' | j == 'Medical_History_10') next
    tempCatPlot = ggplot(data = train_categorical) + geom_histogram(mapping = aes_string(x = j), fill = 'Blue', alpha = 0.5)
    tempCatPlot = ggplotly(tempCatPlot, color = ~j, width = 400, height = 200) %>% layout(title= 'Distribution of Categorical Variable', plot_bgcolor= 'white', xaxis = list(gridcolor = 'lightgrey', opacity = 0.2), yaxis = list(gridcolor = 'lightgrey', opacity = 0.2), autosize = F, width = 400, height = 200)
    print(tempCatPlot)
  }
}
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()

From the density plots it looks like Employment_Info_6 has a very similar histogram to the response variable. Its a good indication that this variable maybe a good predictor of the response variable.

Now let’s explore to see the relationship and any collinearity that exist between the variables. First lets plot a correlation matrix of all continuous and categorical variables with response

corInputVarNames = c(categoricalVarNames, continuousVarNames)
corInputData = trainingData %>% select(corInputVarNames) %>% select(-Product_Info_2)
cor1 = cor(corInputData)
corDatalongform = melt(cor1)
ggplot(data = corDatalongform, mapping = aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = 'white') + scale_fill_gradient2(low = 'blue', high = 'red', mid = 'white', midpoint = 0, limit = c(-1,1)) + theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust = 1)) + ggtitle ('Pearson Correlation Matrix excl. Discrete Variables (Medical Keywords)')

From this we see some interesting observations: i. Medica_History_25 and Medical_History_26 show negative correlation. ii. Variable 7 is strongly correlated with variables 18 and 14.

Now let’s look at detail by subsetting the data to identify correlation that exist between bins of input data ie types like Medical History, Product Info etc.

medHistData = trainingData %>% select(starts_with('Medical_History'), Response, -c(Medical_History_10, Medical_History_24, Medical_History_32))
cormedHist = cor(medHistData)
corMedHistlongform = melt(cormedHist)
ggplot(data = corMedHistlongform, mapping = aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = 'white') + scale_fill_gradient2(low = 'blue', high = 'red', mid = 'white', midpoint = 0, limit = c(-1,1)) + theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust = 1)) + ggtitle ('Pearson Correlation Matrix for Medical History Variables')

From this graph we can see that: i. Medical_History_25 and Medical_History_26 show negative correlation ii. Medical_History_36 is postively correlated to Medical_History_15 and negatively correlated to Medical_History_26 iii. Medical_History_7 is storgly correlated with Medical_History_8 and Medical_History_14

physicalData = trainingData %>% select(Ins_Age, BMI, Ht, Wt, Response)
corPhyData = cor(physicalData)
corPhyDatalongForm = melt(corPhyData)
ggplot(data = corPhyDatalongForm, mapping = aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = 'white') + scale_fill_gradient2(low = 'blue', high = 'red', mid = 'white', midpoint = 0, limit = c(-1,1)) + theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust = 1)) + ggtitle ('Pearson Correlation Matrix for Physical Attributes')

From this graph its interesting to see that Response is negatively correlated with Wt/BMI and Ins_Age.

insData = trainingData %>% select(starts_with('Insurance_History'), Response)
corinsData = cor(insData)
corinsDatalongform = melt(corinsData)
ggplot(data = corinsDatalongform, mapping = aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = 'white') + scale_fill_gradient2(low = 'blue', high = 'red', mid = 'white', midpoint = 0, limit = c(-1,1)) + theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust = 1)) + ggtitle ('Pearson Correlation Matrix for Insurance History Attributes')

From this graph its interesting to see that Response is slightly negatively correlated to Insurance_History_2 variable. Insurance_History_3 is negatively correlated to 4, 7, 9. 4 is positively correlated to 7 and 9, 1. 7 is positively correlated 4, 1, 9.

productData = trainingData %>% select(starts_with('Product_Info'), Response, -Product_Info_2)
corProductData = cor(productData)
corProductDatalongform = melt(corProductData)
ggplot(data = corProductDatalongform, mapping = aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = 'white') + scale_fill_gradient2(low = 'blue', high = 'red', mid = 'white', midpoint = 0, limit = c(-1,1)) + theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust = 1)) + ggtitle ('Pearson Correlation Matrix for Product Information Attributes')

After plotting the correlation matrix for all variables and their subtypes, we now move on to fitting models. First we need to handle missing values. For this analysis we drop columns which have missing values.

updatedTrainingData = trainingData[, colSums(is.na(trainingData)) == 0]
updatedTestData = testData[, colSums(is.na(testData)) == 0]
featureNames = names(updatedTrainingData)[2 : (ncol(updatedTrainingData) - 1)]
for(k in featureNames){
  if(class(updatedTrainingData[[k]]) == 'character'){
   level = unique(c(updatedTrainingData[[k]], updatedTestData[[k]]))
   updatedTrainingData[[k]] = as.integer(factor(updatedTrainingData[[k]], levels = level))
   updatedTestData[[k]] = as.integer(factor(updatedTestData[[k]], levels = level)) 
  }
}

Now that the data is cleaned, lets fit few models

xgbModel = xgboost(data = data.matrix(updatedTrainingData[, featureNames]), label = updatedTrainingData$Response, eta = 0.025, depth = 10, nrounds = 100, objective = 'reg:linear', eval_metric = 'rmse')
[1] train-rmse:5.570419 
[2] train-rmse:5.450238 
[3] train-rmse:5.333451 
[4] train-rmse:5.220026 
[5] train-rmse:5.109812 
[6] train-rmse:5.002814 
[7] train-rmse:4.898862 
[8] train-rmse:4.797959 
[9] train-rmse:4.700017 
[10]    train-rmse:4.604948 
[11]    train-rmse:4.512718 
[12]    train-rmse:4.423228 
[13]    train-rmse:4.336429 
[14]    train-rmse:4.252247 
[15]    train-rmse:4.170590 
[16]    train-rmse:4.091464 
[17]    train-rmse:4.014576 
[18]    train-rmse:3.940206 
[19]    train-rmse:3.868004 
[20]    train-rmse:3.798201 
[21]    train-rmse:3.730433 
[22]    train-rmse:3.664940 
[23]    train-rmse:3.601333 
[24]    train-rmse:3.539867 
[25]    train-rmse:3.480426 
[26]    train-rmse:3.422746 
[27]    train-rmse:3.366975 
[28]    train-rmse:3.313237 
[29]    train-rmse:3.261140 
[30]    train-rmse:3.210736 
[31]    train-rmse:3.162163 
[32]    train-rmse:3.115008 
[33]    train-rmse:3.069538 
[34]    train-rmse:3.025833 
[35]    train-rmse:2.983483 
[36]    train-rmse:2.942677 
[37]    train-rmse:2.903225 
[38]    train-rmse:2.865234 
[39]    train-rmse:2.828328 
[40]    train-rmse:2.793048 
[41]    train-rmse:2.758697 
[42]    train-rmse:2.725726 
[43]    train-rmse:2.693974 
[44]    train-rmse:2.663177 
[45]    train-rmse:2.633579 
[46]    train-rmse:2.605337 
[47]    train-rmse:2.577870 
[48]    train-rmse:2.551438 
[49]    train-rmse:2.526129 
[50]    train-rmse:2.501600 
[51]    train-rmse:2.478041 
[52]    train-rmse:2.455495 
[53]    train-rmse:2.433679 
[54]    train-rmse:2.412680 
[55]    train-rmse:2.392509 
[56]    train-rmse:2.373122 
[57]    train-rmse:2.354504 
[58]    train-rmse:2.336630 
[59]    train-rmse:2.319384 
[60]    train-rmse:2.302925 
[61]    train-rmse:2.287008 
[62]    train-rmse:2.271823 
[63]    train-rmse:2.257182 
[64]    train-rmse:2.243029 
[65]    train-rmse:2.229513 
[66]    train-rmse:2.216529 
[67]    train-rmse:2.203969 
[68]    train-rmse:2.191843 
[69]    train-rmse:2.180212 
[70]    train-rmse:2.169224 
[71]    train-rmse:2.158580 
[72]    train-rmse:2.148142 
[73]    train-rmse:2.138285 
[74]    train-rmse:2.128945 
[75]    train-rmse:2.119676 
[76]    train-rmse:2.110812 
[77]    train-rmse:2.102316 
[78]    train-rmse:2.094156 
[79]    train-rmse:2.086369 
[80]    train-rmse:2.078901 
[81]    train-rmse:2.071867 
[82]    train-rmse:2.064955 
[83]    train-rmse:2.058130 
[84]    train-rmse:2.051683 
[85]    train-rmse:2.045707 
[86]    train-rmse:2.039685 
[87]    train-rmse:2.033930 
[88]    train-rmse:2.028527 
[89]    train-rmse:2.023332 
[90]    train-rmse:2.018152 
[91]    train-rmse:2.013489 
[92]    train-rmse:2.008748 
[93]    train-rmse:2.004300 
[94]    train-rmse:1.999893 
[95]    train-rmse:1.995622 
[96]    train-rmse:1.991551 
[97]    train-rmse:1.987637 
[98]    train-rmse:1.983850 
[99]    train-rmse:1.980297 
[100]   train-rmse:1.976910 
impMatrix = xgb.importance(model = xgbModel)
impMatrixtop10 = impMatrix[c(1:10), ]
xgb.plot.importance(importance_matrix = impMatrixtop10)

impMatrixtop30 = impMatrix[c(1:30), ]
xgb.plot.importance(importance_matrix = impMatrixtop30)

submissionData = data.frame(Id = updatedTestData$Id)
submissionData$Response = round(predict(xgbModel, data.matrix(updatedTestData[, featureNames])))

Questions: 1. A refined statement of the model’s purpose and application from project Component 1. The main purpose of model development was to look for parameters that have a strong influence on the final decision associated with an life insurance application.

  1. A refined statement of the data used in the model, as well as transformations and abstractions you performed. In order to acheive this, an Extreme Gradient Boost model was fit on the training dataset. Before XGBoost model was fit on the dataset, the data was scrubbed to discard variables (columns) with NULL values. Further variables of character type (Product_Info_2) was coded with Numeric ID’s based on the assumption that they are categorical. In addition to this during the exploratory data analysis phase several transformation/abstraction technique was used (gather, calculate percent NULL values for each response type etc.)

  2. A refined statement about its validity and generalizability. The parameters selected by the XGBoost model as having a strong influence on the decision of a life insurance application makes sense. We can see Physical attributes (BMI, Age) and Medical History have the strongest influence. In terms of generalizability it seems to make sense that Medical background and Physical attributes of a person have strong influence on life insurance application decision.

  3. A refined graphical depiction of the most revealing and interesting element of the model. When we expand our selection of variable importance to 30 top variables that influence Life insurance decision, it was interesting to see that most of the applicant employment information hardly mattered. Infact, only one of the employment parameters made it to the top 30 list.

LS0tDQp0aXRsZTogIlBydWRlbnRpYWwgTGlmZSBJbnN1cmFuY2UgRGF0YSBBbmFseXNpcyINCkF1dGhvcjogJ1N1bXVraCBSYW1lc2gnDQpEYXRlOiAnTm92ZW1iZXIgMjV0aCwgMjAxOCcNCkxhc3QgVXBkYXRlOiAnJw0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KUmVhZGluZyBQcnVkZW50aWFsIExpZmUgSW5zdXJhbmNlIERhdGENCg0KYGBge3IsIGVjaG8gPSBGQUxTRSwgbWVzc2FnZSA9IEZBTFNFLCB3YXJuaW5ncyA9IEZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KHBsb3RseSkNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHNraW1yKQ0KbGlicmFyeShnZ3RoZW1lcykNCmxpYnJhcnkoZ3JpZEV4dHJhKQ0KbGlicmFyeShnZ2ZvcmNlKQ0KbGlicmFyeShjYXIpDQpsaWJyYXJ5KHJlc2hhcGUyKQ0KbGlicmFyeSh4Z2Jvb3N0KQ0KbGlicmFyeShjYXJldCkNCg0KdHJhaW5pbmdEYXRhID0gcmVhZF9jc3YoJ3RyYWluLmNzdicpDQp0ZXN0RGF0YSA9IHJlYWRfY3N2KCd0ZXN0LmNzdicpDQpzZXQuc2VlZCgxMCkNCg0KYGBgDQoNClF1aWNrIFNuYXBzaG90IG9mIERhdGEgZm9sbG93ZWQgYnkgc2VwZXJhdGluZyBpbmRpdmlkdWFsIHZhcmlhYmxlIHR5cGVzDQpgYGB7cn0NCm5hbWVzKHRyYWluaW5nRGF0YSkNCmhlYWQodHJhaW5pbmdEYXRhKQ0KaGVhZCh0ZXN0RGF0YSkNCg0KY2F0ZWdvcmljYWxWYXJOYW1lcyA9IGMocGFzdGUoJ1Byb2R1Y3RfSW5mb18nLCBjKDE6Myw1OjcpLCBzZXA9JycpLCANCiAgICAgICAgICAgICAgICAgICAgICBwYXN0ZSgnRW1wbG95bWVudF9JbmZvXycsIGMoMiwzLDUpLCBzZXA9IiIpLCANCiAgICAgICAgICAgICAgICAgICAgICBwYXN0ZSgnSW5zdXJlZEluZm9fJywgMTo3LCBzZXA9JycpLCANCiAgICAgICAgICAgICAgICAgICAgICBwYXN0ZSgnSW5zdXJhbmNlX0hpc3RvcnlfJywgYygxOjQsNzo5KSwgc2VwPScnKSwNCiAgICAgICAgICAgICAgICAgICAgICAnRmFtaWx5X0hpc3RfMScsIA0KICAgICAgICAgICAgICAgICAgICAgIHBhc3RlKCdNZWRpY2FsX0hpc3RvcnlfJywgYygyOjE0LCAxNjoyMywgMjU6MzEsIDMzOjQxKSwgc2VwPScnKSkNCg0KY29udGludW91c1Zhck5hbWVzID0gYygnUHJvZHVjdF9JbmZvXzQnLCANCiAgICAgICAgICAgICAgICAgICAgICAgJ0luc19BZ2UnLCANCiAgICAgICAgICAgICAgICAgICAgICAgJ0h0JywgDQogICAgICAgICAgICAgICAgICAgICAgICdXdCcsIA0KICAgICAgICAgICAgICAgICAgICAgICAnQk1JJywgDQogICAgICAgICAgICAgICAgICAgICAgICdFbXBsb3ltZW50X0luZm9fMScsIA0KICAgICAgICAgICAgICAgICAgICAgICAnRW1wbG95bWVudF9JbmZvXzQnLCANCiAgICAgICAgICAgICAgICAgICAgICAgJ0VtcGxveW1lbnRfSW5mb182JywgDQogICAgICAgICAgICAgICAgICAgICAgICdJbnN1cmFuY2VfSGlzdG9yeV81JywgDQogICAgICAgICAgICAgICAgICAgICAgICdGYW1pbHlfSGlzdF8yJywgDQogICAgICAgICAgICAgICAgICAgICAgICdGYW1pbHlfSGlzdF8zJywgDQogICAgICAgICAgICAgICAgICAgICAgICdGYW1pbHlfSGlzdF80JywgDQogICAgICAgICAgICAgICAgICAgICAgICdGYW1pbHlfSGlzdF81JykNCg0KZGlzY3JldGVWYXJOYW1lcyA9IGMoJ01lZGljYWxfSGlzdG9yeV8xJywgDQogICAgICAgICAgICAgICAgICAgICAnTWVkaWNhbF9IaXN0b3J5XzE1JywgDQogICAgICAgICAgICAgICAgICAgICAnTWVkaWNhbF9IaXN0b3J5XzI0JywgDQogICAgICAgICAgICAgICAgICAgICAnTWVkaWNhbF9IaXN0b3J5XzMyJywgDQogICAgICAgICAgICAgICAgICAgICBwYXN0ZSgnTWVkaWNhbF9LZXl3b3JkXycsIDE6NDgsIHNlcD0nJykpDQoNCnRyYWluX2NhdGVnb3JpY2FsID0gdHJhaW5pbmdEYXRhICU+JSBzZWxlY3QoY2F0ZWdvcmljYWxWYXJOYW1lcykNCnRlc3RfY2F0ZWdvcmljYWwgPSB0ZXN0RGF0YSAlPiUgc2VsZWN0KGNhdGVnb3JpY2FsVmFyTmFtZXMpDQoNCnRyYWluX2NvbnRpbnVvdXMgPSB0cmFpbmluZ0RhdGEgJT4lIHNlbGVjdChjb250aW51b3VzVmFyTmFtZXMpDQp0ZXN0X2NvbnRpbnVvdXMgPSB0ZXN0RGF0YSAlPiUgc2VsZWN0KGNvbnRpbnVvdXNWYXJOYW1lcykNCg0KdHJhaW5fZGlzY3JldGUgPSB0cmFpbmluZ0RhdGEgJT4lIHNlbGVjdChkaXNjcmV0ZVZhck5hbWVzKQ0KdGVzdF9kaXNjcmV0ZSA9IHRlc3REYXRhICU+JSBzZWxlY3QoZGlzY3JldGVWYXJOYW1lcykNCg0KYGBgDQoNCkxldCdzIHRha2UgYSBsb29rIGF0IHRoZSBjYXRlZ29yaWNhbCB2YXJpYWJsZSB0eXBlDQoNCmBgYHtyfQ0Kc2tpbSh0cmFpbl9jYXRlZ29yaWNhbCkNCnNraW0odGVzdF9jYXRlZ29yaWNhbCkNCmBgYA0KTGV0J3MgZXhwbG9yZSBjb250aW51b3VzIHZhcmlhYmxlcw0KYGBge3J9DQpza2ltKHRyYWluX2NvbnRpbnVvdXMpDQpza2ltKHRlc3RfY29udGludW91cykNCmBgYA0KDQpMZXQncyBleHBsb3JlIGRpc2NyZXRlIHZhcmlhYmxlcw0KYGBge3J9DQpza2ltKHRyYWluX2Rpc2NyZXRlKQ0Kc2tpbSh0ZXN0X2Rpc2NyZXRlKQ0KYGBgDQoNCkZyb20gdGhlIGFib3ZlIHNraW0gc3RhdGVtZW50cywgd2Ugc2VlIHNvbWUgbWlzc2luZyBkYXRhLiBXaGF0IGlzIHRoZSBwcmVjZW50YWdlIG9mIG1pc3NpbmcgZGF0YSBpbiB0cmFpbmluZyBhbmQgdGVzdCBkYXRhc2V0cw0KYGBge3J9DQpzdW0oaXMubmEodHJhaW5pbmdEYXRhKSkgLyAobnJvdyh0cmFpbmluZ0RhdGEpICogbmNvbCh0cmFpbmluZ0RhdGEpKQ0Kc3VtKGlzLm5hKHRlc3REYXRhKSkgLyAobnJvdyh0ZXN0RGF0YSkgKiBuY29sKHRlc3REYXRhKSkNCg0KYXBwbHkodHJhaW5pbmdEYXRhLCAyLCBmdW5jdGlvbih4KXsNCiAgc3VtKGlzLm5hKHgpKQ0KfSkNCg0KYXBwbHkodGVzdERhdGEsIDIsIGZ1bmN0aW9uKHgpew0KICBzdW0oaXMubmEoeCkpDQp9KQ0KYGBgDQoNCg0KTm93IGxldCdzIGxvb2sgYXQgdGhlIHJlc3BvbnNlIHZhcmlhYmxlLiBUaGUgcmVzcG9uc2UgdmFyaWFibGUgaXMgb2Ygbm9taW5hbCB0eXBlIGFuZCBoYXMgbGV2ZWxzIGZyb20gMSB0byA4Lg0KYGBge3J9DQp0ZW1wMSA9IGdncGxvdCh0cmFpbmluZ0RhdGEsIGFlcyh4ID0gUmVzcG9uc2UpKSArIGdlb21faGlzdG9ncmFtKGZpbGwgPSAnQmx1ZScsIGFscGhhPTAuNSkNCmdncGxvdGx5KHRlbXAxLCBjb2xvciA9IH5SZXNwb25zZSwgd2lkdGggPSA0MDAsIGhlaWdodCA9IDIwMCkgJT4lIGxheW91dCh0aXRsZT0gJ0Rpc3RyaWJ1dGlvbiBvZiBSZXNwb25zZSBWYXJpYWJsZScsIHBsb3RfYmdjb2xvcj0gJ3doaXRlJywgeGF4aXMgPSBsaXN0KGdyaWRjb2xvciA9ICdsaWdodGdyZXknLCBvcGFjaXR5ID0gMC4yKSwgeWF4aXMgPSBsaXN0KGdyaWRjb2xvciA9ICdsaWdodGdyZXknLCBvcGFjaXR5ID0gMC4yKSwgYXV0b3NpemUgPSBGLCB3aWR0aCA9IDQwMCwgaGVpZ2h0ID0gMjAwKQ0KYGBgDQoNCg0KQmFzZWQgb24gdGhlIGFib3ZlIHBsb3Qgd2Ugc2VlIHRoYXQgUmVzcG9uc2UgPSA4IGlzIG1vc3QgY29tbW9uIGFuZCBSZXNwb25zZSA9IDMgaXMgbGVhc3QgY29tbW9uIGluIHRoZSBkYXRhc2V0LiBOb3cgbGV0J3MgbG9vayBhdCBob3cgbWlzc2luZyB2YWx1ZXMgcmVsYXRlIHRvIHRoZXNlIHJlc3BvbnNlcw0KDQpgYGB7cn0NCm5vb2ZOQXBlclJlc3BvbnNlVHlwZSA8LSBzYXBwbHkoc29ydCh1bmlxdWUodHJhaW5pbmdEYXRhJFJlc3BvbnNlKSksIGZ1bmN0aW9uKHgpIHsNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBhcHBseSh0cmFpbmluZ0RhdGFbdHJhaW5pbmdEYXRhJFJlc3BvbnNlID09IHgsIF0sIDIsIGZ1bmN0aW9uKHkpIHsNCiAgICAgICAgICAgICAgc3VtKGlzLm5hKHkpKSANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9KSANCiAgICAgICAgICAgICAgICAgICAgICAgIH0pDQpub29mTkFwZXJSZXNwb25zZVR5cGUNCnJvdW5kKGNvbFN1bXMobm9vZk5BcGVyUmVzcG9uc2VUeXBlKSAvIHN1bShub29mTkFwZXJSZXNwb25zZVR5cGUpLCBkaWdpdHM9MikNCmBgYA0KRnJvbSB0aGlzIHdlIHNlZSB0aGF0IGRhdGEgd2l0aCBSZXNwb25zZSBhcyA4IGhhcyBtb3N0IG1pc3NpbmcgdmFsdWVzIHdoZXJlYXMgUmVzcG9uc2UgZXF1YWxzIDMgaGFzIGxlYXN0IG1pc3NpbmcgdmFsdWVzLiBOb3cgbGV0J3MgbG9vayBmb3IgRHVwbGljYXRlZCBhbmQgQ29uc3RhbnQgUm93cw0KDQpgYGB7cn0NCmNhdCgnTnVtYmVyIG9mIGR1cGxpY2F0ZWQgcm93cyBpbiBUcmFpbmluZyBkYXRhc2V0ID0gJywgbnJvdyh0cmFpbmluZ0RhdGEpIC0gbnJvdyh1bmlxdWUodHJhaW5pbmdEYXRhKSksICdcbicpDQpjYXQoJ051bWJlciBvZiBjb25zdGFudCByb3dzIGluIFRyYWluaW5nIGRhdGFzZXQgPSAnLCBzdW0oc2FwcGx5KHRyYWluaW5nRGF0YSwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmdW5jdGlvbih4KXsgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGxlbmd0aCh1bmlxdWUoeCkpID09IDF9KSksICdcbicpDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICNIb3cgdG8gdGVsbCBhIGRwbHlyIGZ1bmN0aW9uIHRvIGFwcGx5IGl0ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbHVtbndpc2UNCmNhdCgnTnVtYmVyIG9mIGR1cGxpY2F0ZWQgcm93cyBpbiBUZXN0IGRhdGFzZXQgPSAnLCBucm93KHRlc3REYXRhKSAtIG5yb3codW5pcXVlKHRlc3REYXRhKSksICdcbicpDQpjYXQoJ051bWJlciBvZiBjb25zdGFudCByb3dzIGluIFRlc3QgZGF0YXNldCA9ICcsIHN1bShzYXBwbHkodGVzdERhdGEsIGZ1bmN0aW9uKHgpeyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGVuZ3RoKHVuaXF1ZSh4KSkgPT0gMX0pKSwgJ1xuJykNCmBgYA0KDQoNCk5vdyB0aGF0IHdlIGhhdmUgbG9va2VkIGF0IG1pc3NpbmcgdmFsdWVzLCBjaGVja2VkIGZvciBkdXBsaWNhdGlvbiBhbmQgY29uc3RhbnQgcm93cy4gTGV0J3MgbG9vayBhdCBpbmRpdmlkdWFsIGRhdGEgdHlwZXMgYW5kIHZpc3VhbGl6ZSB0aGUgZGlzdHJpYnV0aW9uLg0KDQpQbG90IGRlbnNpdGllcyBvZiBjb250aW51b3VzIGZlYXR1cmVzDQoNCmBgYHtyfQ0KdGVtcERhdGFzZXQgPSB0cmFpbl9jb250aW51b3VzICU+JSBzZWxlY3QoYygxLCAyKSkgJT4lIGdhdGhlcigpDQp0ZW1wZGVuUGxvdCA9IGdncGxvdChkYXRhID0gdGVtcERhdGFzZXQpICsgZ2VvbV9kZW5zaXR5KG1hcHBpbmcgPSBhZXMoeCA9IHZhbHVlLCBmaWxsID0ga2V5LCBhbHBoYSA9IDAuNSkpICsgbGFicyh0aXRsZSA9ICdEZW5zaXR5IFBsb3RzJykNCmdncGxvdGx5KHRlbXBkZW5QbG90LCBoZWlnaHQ9IDYwMCwgd2lkdGggPSA4MDApICU+JSANCiAgICAgIGxheW91dChwbG90X2JnY29sb3I9J3RyYW5zcGFyZW50JywgcGFwZXJfYmdjb2xvcj0gJ3RyYW5zcGFyZW50JywgYXV0b3NpemUgPSBGLCB3aWR0aCA9IDgwMCwgaGVpZ2h0ID0gNjAwKQ0KDQpwaHlzaWNhbEF0dERhdGFzZXQgPSB0cmFpbl9jb250aW51b3VzICU+JSBzZWxlY3QoSHQsIFd0LCBCTUkpICU+JSBnYXRoZXIoKQ0KdGVtcGRlblBsb3QgPSBnZ3Bsb3QoZGF0YSA9IHBoeXNpY2FsQXR0RGF0YXNldCkgKyBnZW9tX2RlbnNpdHkobWFwcGluZyA9IGFlcyh4ID0gdmFsdWUsIGZpbGwgPSBrZXksIGFscGhhID0gMC41KSkgKyBsYWJzKHRpdGxlID0gJ0RlbnNpdHkgUGxvdHMgb2YgUGh5c2ljYWwgQ2hhcmFjdGVyaXN0aWNzJykNCmdncGxvdGx5KHRlbXBkZW5QbG90LCBoZWlnaHQ9IDYwMCwgd2lkdGggPSA4MDApICU+JSANCiAgICAgIGxheW91dChwbG90X2JnY29sb3I9J3RyYW5zcGFyZW50JywgcGFwZXJfYmdjb2xvcj0gJ3RyYW5zcGFyZW50JywgYXV0b3NpemUgPSBGLCB3aWR0aCA9IDgwMCwgaGVpZ2h0ID0gNjAwKQ0KDQplbXBsb3lBdHREYXRhc2V0ID0gdHJhaW5fY29udGludW91cyAlPiUgc2VsZWN0KEVtcGxveW1lbnRfSW5mb18xLCBFbXBsb3ltZW50X0luZm9fNikgJT4lIGdhdGhlcigpDQp0ZW1wZGVuUGxvdCA9IGdncGxvdChkYXRhID0gZW1wbG95QXR0RGF0YXNldCkgKyBnZW9tX2RlbnNpdHkobWFwcGluZyA9IGFlcyh4ID0gdmFsdWUsIGZpbGwgPSBrZXksIGFscGhhID0gMC41KSkgKyBsYWJzKHRpdGxlID0gJ0RlbnNpdHkgUGxvdHMgb2YgRW1wbG95bWVudCBJbmZvIDEgYW5kIDYnKQ0KZ2dwbG90bHkodGVtcGRlblBsb3QsIGhlaWdodD0gNjAwLCB3aWR0aCA9IDgwMCkgJT4lIA0KICAgICAgbGF5b3V0KHBsb3RfYmdjb2xvcj0ndHJhbnNwYXJlbnQnLCBwYXBlcl9iZ2NvbG9yPSAndHJhbnNwYXJlbnQnLCBhdXRvc2l6ZSA9IEYsIHdpZHRoID0gODAwLCBoZWlnaHQgPSA2MDApDQoNCmVtcGxveURhdGFzZXQgPSB0cmFpbl9jb250aW51b3VzICU+JSBzZWxlY3QoRW1wbG95bWVudF9JbmZvXzQpICU+JSBnYXRoZXIoKQ0KdGVtcGRlblBsb3QgPSBnZ3Bsb3QoZGF0YSA9IGVtcGxveURhdGFzZXQpICsgZ2VvbV9kZW5zaXR5KG1hcHBpbmcgPSBhZXMoeCA9IHZhbHVlLCBmaWxsID0ga2V5LCBhbHBoYSA9IDAuNSkpICsgbGFicyh0aXRsZSA9ICdEZW5zaXR5IFBsb3RzIG9mIEVtcGxveW1lbnQgSW5mbyA0JykNCmdncGxvdGx5KHRlbXBkZW5QbG90LCBoZWlnaHQ9IDYwMCwgd2lkdGggPSA4MDApICU+JSANCiAgICAgIGxheW91dChwbG90X2JnY29sb3I9J3RyYW5zcGFyZW50JywgcGFwZXJfYmdjb2xvcj0gJ3RyYW5zcGFyZW50JywgYXV0b3NpemUgPSBGLCB3aWR0aCA9IDgwMCwgaGVpZ2h0ID0gNjAwKQ0KDQpmYW1pbHlIaXN0RGF0YXNldCA9IHRyYWluX2NvbnRpbnVvdXMgJT4lIHNlbGVjdChzdGFydHNfd2l0aCgnRmFtaWx5X0hpc3RfJykpICU+JSBnYXRoZXIoKQ0KdGVtcGRlblBsb3QgPSBnZ3Bsb3QoZGF0YSA9IGZhbWlseUhpc3REYXRhc2V0KSArIGdlb21fZGVuc2l0eShtYXBwaW5nID0gYWVzKHggPSB2YWx1ZSwgZmlsbCA9IGtleSwgYWxwaGEgPSAwLjUpKSArIGxhYnModGl0bGUgPSAnRGVuc2l0eSBQbG90cyBvZiBGYW1pbHkgSGlzdG9yeScpDQpnZ3Bsb3RseSh0ZW1wZGVuUGxvdCwgaGVpZ2h0PSA2MDAsIHdpZHRoID0gODAwKSAlPiUgDQogICAgICBsYXlvdXQocGxvdF9iZ2NvbG9yPSd0cmFuc3BhcmVudCcsIHBhcGVyX2JnY29sb3I9ICd0cmFuc3BhcmVudCcsIGF1dG9zaXplID0gRiwgd2lkdGggPSA4MDAsIGhlaWdodCA9IDYwMCkNCg0KaW5zdXJIaXN0RGF0YXNldCA9IHRyYWluX2NvbnRpbnVvdXMgJT4lIHNlbGVjdChjKDkpKSAlPiUgZ2F0aGVyKCkNCnRlbXBkZW5QbG90ID0gZ2dwbG90KGRhdGEgPSBpbnN1ckhpc3REYXRhc2V0KSArIGdlb21fZGVuc2l0eShtYXBwaW5nID0gYWVzKHggPSB2YWx1ZSwgZmlsbCA9IGtleSwgYWxwaGEgPSAwLjUpKSArIGxhYnModGl0bGUgPSAnRGVuc2l0eSBQbG90cycpDQpnZ3Bsb3RseSh0ZW1wZGVuUGxvdCwgaGVpZ2h0PSA4MDAsIHdpZHRoID0gMTAwMCkgJT4lIA0KICAgICAgbGF5b3V0KHBsb3RfYmdjb2xvcj0ndHJhbnNwYXJlbnQnLCBwYXBlcl9iZ2NvbG9yPSAndHJhbnNwYXJlbnQnLCBhdXRvc2l6ZSA9IEYsIHdpZHRoID0gODAwLCBoZWlnaHQgPSA4MDApDQoNCmBgYA0KUGxvdCBCb3hwbG90cyBvZiBDb250aW51b3VzIFZhcmlhYmxlcw0KYGBge3J9DQp0ZW1wRGF0YXNldCA9IHRyYWluX2NvbnRpbnVvdXMgJT4lIHNlbGVjdChjKDEsIDIsIDkpKSAlPiUgZ2F0aGVyKCkNCnBsb3RfbHkoZGF0YSA9IHRlbXBEYXRhc2V0LCB0eXBlID0gJ2JveCcsIHNwbGl0ID0gfmtleSwgeSA9IH52YWx1ZSkgJT4lIGxheW91dCh0aXRsZSA9ICdCb3ggUGxvdHMnKQ0KcGxvdF9seShkYXRhID0gcGh5c2ljYWxBdHREYXRhc2V0LCB0eXBlID0gJ2JveCcsIHNwbGl0ID0gfmtleSwgeSA9IH52YWx1ZSkgJT4lIGxheW91dCh0aXRsZSA9ICdCb3ggUGxvdHMgb2YgUGh5c2ljYWwgQ2hhcmFjdGVyaXN0aWNzJykNCnBsb3RfbHkoZGF0YSA9IGZhbWlseUhpc3REYXRhc2V0LCB0eXBlID0gJ2JveCcsIHNwbGl0ID0gfmtleSwgeSA9IH52YWx1ZSkgJT4lIGxheW91dCh0aXRsZSA9ICdCb3ggUGxvdHMgb2YgRmFtaWx5IEhpc3RvcnkgQXR0cmlidXRlcycpDQplbXBsb3lJbmZvRGF0YSA9IHRyYWluX2NvbnRpbnVvdXMgJT4lIHNlbGVjdChzdGFydHNfd2l0aCgnRW1wbG95bWVudF9JbmZvXycpKSAlPiUgZ2F0aGVyKCkNCnBsb3RfbHkoZGF0YSA9IGVtcGxveUluZm9EYXRhLCB0eXBlID0gJ2JveCcsIHNwbGl0ID0gfmtleSwgeSA9IH52YWx1ZSkgJT4lIGxheW91dCh0aXRsZSA9ICdCb3ggUGxvdHMgb2YgRW1wbG95bWVudCBJbmZvcm1hdGlvbiBBdHRyaWJ1dGVzJykNCmBgYA0KSGlzdG9ncmFtcyBvZiBDYXRlZ29yaWNhbCBWYXJpYWJsZXMNCmBgYHtyfQ0Kc3ViU2VsZWN0aW9uVmFscyA9IGMoJ1Byb2R1Y3RfSW5mbycsICdFbXBsb3ltZW50X0luZm8nLCAnSW5zdXJlZEluZm8nLCAnSW5zdXJhbmNlX0hpc3RvcnknLCAnRmFtaWx5X0hpc3QnLCAnTWVkaWNhbF9IaXN0b3J5JykNCmZvcihpIGluIHN1YlNlbGVjdGlvblZhbHMpIHsNCiAgdGVtcENhdERhdGFzZXQgPSB0cmFpbl9jYXRlZ29yaWNhbCAlPiUgc2VsZWN0KHN0YXJ0c193aXRoKGkpKQ0KICB0ZW1wY29sTmFtZXMgPSBuYW1lcyh0ZW1wQ2F0RGF0YXNldCkNCiAgIGZvcihqIGluIHRlbXBjb2xOYW1lcyl7DQogICAgaWYoaiA9PSAnUHJvZHVjdF9JbmZvXzInIHwgaiA9PSAnTWVkaWNhbF9IaXN0b3J5XzEwJykgbmV4dA0KICAgIHRlbXBDYXRQbG90ID0gZ2dwbG90KGRhdGEgPSB0cmFpbl9jYXRlZ29yaWNhbCkgKyBnZW9tX2hpc3RvZ3JhbShtYXBwaW5nID0gYWVzX3N0cmluZyh4ID0gaiksIGZpbGwgPSAnQmx1ZScsIGFscGhhID0gMC41KQ0KICAgIHRlbXBDYXRQbG90ID0gZ2dwbG90bHkodGVtcENhdFBsb3QsIGNvbG9yID0gfmosIHdpZHRoID0gNDAwLCBoZWlnaHQgPSAyMDApICU+JSBsYXlvdXQodGl0bGU9ICdEaXN0cmlidXRpb24gb2YgQ2F0ZWdvcmljYWwgVmFyaWFibGUnLCBwbG90X2JnY29sb3I9ICd3aGl0ZScsIHhheGlzID0gbGlzdChncmlkY29sb3IgPSAnbGlnaHRncmV5Jywgb3BhY2l0eSA9IDAuMiksIHlheGlzID0gbGlzdChncmlkY29sb3IgPSAnbGlnaHRncmV5Jywgb3BhY2l0eSA9IDAuMiksIGF1dG9zaXplID0gRiwgd2lkdGggPSA0MDAsIGhlaWdodCA9IDIwMCkNCiAgICBwcmludCh0ZW1wQ2F0UGxvdCkNCiAgfQ0KfQ0KYGBgDQpGcm9tIHRoZSBkZW5zaXR5IHBsb3RzIGl0IGxvb2tzIGxpa2UgRW1wbG95bWVudF9JbmZvXzYgaGFzIGEgdmVyeSBzaW1pbGFyIGhpc3RvZ3JhbSB0byB0aGUgcmVzcG9uc2UgdmFyaWFibGUuIEl0cyBhIGdvb2QgaW5kaWNhdGlvbiB0aGF0IHRoaXMgdmFyaWFibGUgbWF5YmUgYSBnb29kIHByZWRpY3RvciBvZiB0aGUgcmVzcG9uc2UgdmFyaWFibGUuDQoNCk5vdyBsZXQncyBleHBsb3JlIHRvIHNlZSB0aGUgcmVsYXRpb25zaGlwIGFuZCBhbnkgY29sbGluZWFyaXR5IHRoYXQgZXhpc3QgYmV0d2VlbiB0aGUgdmFyaWFibGVzLiBGaXJzdCBsZXRzIHBsb3QgYSBjb3JyZWxhdGlvbiBtYXRyaXggb2YgYWxsIGNvbnRpbnVvdXMgYW5kIGNhdGVnb3JpY2FsIHZhcmlhYmxlcyB3aXRoIHJlc3BvbnNlDQoNCmBgYHtyfQ0KY29ySW5wdXRWYXJOYW1lcyA9IGMoY2F0ZWdvcmljYWxWYXJOYW1lcywgY29udGludW91c1Zhck5hbWVzKQ0KY29ySW5wdXREYXRhID0gdHJhaW5pbmdEYXRhICU+JSBzZWxlY3QoY29ySW5wdXRWYXJOYW1lcykgJT4lIHNlbGVjdCgtUHJvZHVjdF9JbmZvXzIpDQpjb3IxID0gY29yKGNvcklucHV0RGF0YSkNCmNvckRhdGFsb25nZm9ybSA9IG1lbHQoY29yMSkNCmdncGxvdChkYXRhID0gY29yRGF0YWxvbmdmb3JtLCBtYXBwaW5nID0gYWVzKHggPSBWYXIxLCB5ID0gVmFyMiwgZmlsbCA9IHZhbHVlKSkgKyBnZW9tX3RpbGUoY29sb3IgPSAnd2hpdGUnKSArIHNjYWxlX2ZpbGxfZ3JhZGllbnQyKGxvdyA9ICdibHVlJywgaGlnaCA9ICdyZWQnLCBtaWQgPSAnd2hpdGUnLCBtaWRwb2ludCA9IDAsIGxpbWl0ID0gYygtMSwxKSkgKyB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDkwLCB2anVzdCA9IDEsIGhqdXN0ID0gMSkpICsgZ2d0aXRsZSAoJ1BlYXJzb24gQ29ycmVsYXRpb24gTWF0cml4IGV4Y2wuIERpc2NyZXRlIFZhcmlhYmxlcyAoTWVkaWNhbCBLZXl3b3JkcyknKQ0KDQpgYGANCg0KRnJvbSB0aGlzIHdlIHNlZSBzb21lIGludGVyZXN0aW5nIG9ic2VydmF0aW9uczoNCmkuIE1lZGljYV9IaXN0b3J5XzI1IGFuZCBNZWRpY2FsX0hpc3RvcnlfMjYgc2hvdyBuZWdhdGl2ZSBjb3JyZWxhdGlvbi4NCmlpLiBWYXJpYWJsZSA3IGlzIHN0cm9uZ2x5IGNvcnJlbGF0ZWQgd2l0aCB2YXJpYWJsZXMgMTggYW5kIDE0Lg0KDQpOb3cgbGV0J3MgbG9vayBhdCBkZXRhaWwgYnkgc3Vic2V0dGluZyB0aGUgZGF0YSB0byBpZGVudGlmeSBjb3JyZWxhdGlvbiB0aGF0IGV4aXN0IGJldHdlZW4gYmlucyBvZiBpbnB1dCBkYXRhIGllIHR5cGVzIGxpa2UgTWVkaWNhbCBIaXN0b3J5LCBQcm9kdWN0IEluZm8gZXRjLg0KDQpgYGB7cn0NCm1lZEhpc3REYXRhID0gdHJhaW5pbmdEYXRhICU+JSBzZWxlY3Qoc3RhcnRzX3dpdGgoJ01lZGljYWxfSGlzdG9yeScpLCBSZXNwb25zZSwgLWMoTWVkaWNhbF9IaXN0b3J5XzEwLCBNZWRpY2FsX0hpc3RvcnlfMjQsIE1lZGljYWxfSGlzdG9yeV8zMikpDQpjb3JtZWRIaXN0ID0gY29yKG1lZEhpc3REYXRhKQ0KY29yTWVkSGlzdGxvbmdmb3JtID0gbWVsdChjb3JtZWRIaXN0KQ0KZ2dwbG90KGRhdGEgPSBjb3JNZWRIaXN0bG9uZ2Zvcm0sIG1hcHBpbmcgPSBhZXMoeCA9IFZhcjEsIHkgPSBWYXIyLCBmaWxsID0gdmFsdWUpKSArIGdlb21fdGlsZShjb2xvciA9ICd3aGl0ZScpICsgc2NhbGVfZmlsbF9ncmFkaWVudDIobG93ID0gJ2JsdWUnLCBoaWdoID0gJ3JlZCcsIG1pZCA9ICd3aGl0ZScsIG1pZHBvaW50ID0gMCwgbGltaXQgPSBjKC0xLDEpKSArIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gOTAsIHZqdXN0ID0gMSwgaGp1c3QgPSAxKSkgKyBnZ3RpdGxlICgnUGVhcnNvbiBDb3JyZWxhdGlvbiBNYXRyaXggZm9yIE1lZGljYWwgSGlzdG9yeSBWYXJpYWJsZXMnKQ0KDQpgYGANCg0KRnJvbSB0aGlzIGdyYXBoIHdlIGNhbiBzZWUgdGhhdDoNCmkuIE1lZGljYWxfSGlzdG9yeV8yNSBhbmQgTWVkaWNhbF9IaXN0b3J5XzI2IHNob3cgbmVnYXRpdmUgY29ycmVsYXRpb24NCmlpLiBNZWRpY2FsX0hpc3RvcnlfMzYgaXMgcG9zdGl2ZWx5IGNvcnJlbGF0ZWQgdG8gTWVkaWNhbF9IaXN0b3J5XzE1IGFuZCBuZWdhdGl2ZWx5IGNvcnJlbGF0ZWQgdG8gTWVkaWNhbF9IaXN0b3J5XzI2DQppaWkuIE1lZGljYWxfSGlzdG9yeV83IGlzIHN0b3JnbHkgY29ycmVsYXRlZCB3aXRoIE1lZGljYWxfSGlzdG9yeV84IGFuZCBNZWRpY2FsX0hpc3RvcnlfMTQNCg0KYGBge3J9DQpwaHlzaWNhbERhdGEgPSB0cmFpbmluZ0RhdGEgJT4lIHNlbGVjdChJbnNfQWdlLCBCTUksIEh0LCBXdCwgUmVzcG9uc2UpDQpjb3JQaHlEYXRhID0gY29yKHBoeXNpY2FsRGF0YSkNCmNvclBoeURhdGFsb25nRm9ybSA9IG1lbHQoY29yUGh5RGF0YSkNCmdncGxvdChkYXRhID0gY29yUGh5RGF0YWxvbmdGb3JtLCBtYXBwaW5nID0gYWVzKHggPSBWYXIxLCB5ID0gVmFyMiwgZmlsbCA9IHZhbHVlKSkgKyBnZW9tX3RpbGUoY29sb3IgPSAnd2hpdGUnKSArIHNjYWxlX2ZpbGxfZ3JhZGllbnQyKGxvdyA9ICdibHVlJywgaGlnaCA9ICdyZWQnLCBtaWQgPSAnd2hpdGUnLCBtaWRwb2ludCA9IDAsIGxpbWl0ID0gYygtMSwxKSkgKyB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDkwLCB2anVzdCA9IDEsIGhqdXN0ID0gMSkpICsgZ2d0aXRsZSAoJ1BlYXJzb24gQ29ycmVsYXRpb24gTWF0cml4IGZvciBQaHlzaWNhbCBBdHRyaWJ1dGVzJykNCg0KYGBgDQpGcm9tIHRoaXMgZ3JhcGggaXRzIGludGVyZXN0aW5nIHRvIHNlZSB0aGF0IFJlc3BvbnNlIGlzIG5lZ2F0aXZlbHkgY29ycmVsYXRlZCB3aXRoIFd0L0JNSSBhbmQgSW5zX0FnZS4NCg0KYGBge3J9DQppbnNEYXRhID0gdHJhaW5pbmdEYXRhICU+JSBzZWxlY3Qoc3RhcnRzX3dpdGgoJ0luc3VyYW5jZV9IaXN0b3J5JyksIFJlc3BvbnNlKQ0KY29yaW5zRGF0YSA9IGNvcihpbnNEYXRhKQ0KY29yaW5zRGF0YWxvbmdmb3JtID0gbWVsdChjb3JpbnNEYXRhKQ0KZ2dwbG90KGRhdGEgPSBjb3JpbnNEYXRhbG9uZ2Zvcm0sIG1hcHBpbmcgPSBhZXMoeCA9IFZhcjEsIHkgPSBWYXIyLCBmaWxsID0gdmFsdWUpKSArIGdlb21fdGlsZShjb2xvciA9ICd3aGl0ZScpICsgc2NhbGVfZmlsbF9ncmFkaWVudDIobG93ID0gJ2JsdWUnLCBoaWdoID0gJ3JlZCcsIG1pZCA9ICd3aGl0ZScsIG1pZHBvaW50ID0gMCwgbGltaXQgPSBjKC0xLDEpKSArIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gOTAsIHZqdXN0ID0gMSwgaGp1c3QgPSAxKSkgKyBnZ3RpdGxlICgnUGVhcnNvbiBDb3JyZWxhdGlvbiBNYXRyaXggZm9yIEluc3VyYW5jZSBIaXN0b3J5IEF0dHJpYnV0ZXMnKQ0KYGBgDQpGcm9tIHRoaXMgZ3JhcGggaXRzIGludGVyZXN0aW5nIHRvIHNlZSB0aGF0IFJlc3BvbnNlIGlzIHNsaWdodGx5IG5lZ2F0aXZlbHkgY29ycmVsYXRlZCB0byBJbnN1cmFuY2VfSGlzdG9yeV8yIHZhcmlhYmxlLiBJbnN1cmFuY2VfSGlzdG9yeV8zIGlzIG5lZ2F0aXZlbHkgY29ycmVsYXRlZCB0byA0LCA3LCA5LiA0IGlzIHBvc2l0aXZlbHkgY29ycmVsYXRlZCB0byA3IGFuZCA5LCAxLiA3IGlzIHBvc2l0aXZlbHkgY29ycmVsYXRlZCA0LCAxLCA5Lg0KDQoNCg0KYGBge3J9DQpwcm9kdWN0RGF0YSA9IHRyYWluaW5nRGF0YSAlPiUgc2VsZWN0KHN0YXJ0c193aXRoKCdQcm9kdWN0X0luZm8nKSwgUmVzcG9uc2UsIC1Qcm9kdWN0X0luZm9fMikNCmNvclByb2R1Y3REYXRhID0gY29yKHByb2R1Y3REYXRhKQ0KY29yUHJvZHVjdERhdGFsb25nZm9ybSA9IG1lbHQoY29yUHJvZHVjdERhdGEpDQpnZ3Bsb3QoZGF0YSA9IGNvclByb2R1Y3REYXRhbG9uZ2Zvcm0sIG1hcHBpbmcgPSBhZXMoeCA9IFZhcjEsIHkgPSBWYXIyLCBmaWxsID0gdmFsdWUpKSArIGdlb21fdGlsZShjb2xvciA9ICd3aGl0ZScpICsgc2NhbGVfZmlsbF9ncmFkaWVudDIobG93ID0gJ2JsdWUnLCBoaWdoID0gJ3JlZCcsIG1pZCA9ICd3aGl0ZScsIG1pZHBvaW50ID0gMCwgbGltaXQgPSBjKC0xLDEpKSArIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gOTAsIHZqdXN0ID0gMSwgaGp1c3QgPSAxKSkgKyBnZ3RpdGxlICgnUGVhcnNvbiBDb3JyZWxhdGlvbiBNYXRyaXggZm9yIFByb2R1Y3QgSW5mb3JtYXRpb24gQXR0cmlidXRlcycpDQpgYGANCg0KQWZ0ZXIgcGxvdHRpbmcgdGhlIGNvcnJlbGF0aW9uIG1hdHJpeCBmb3IgYWxsIHZhcmlhYmxlcyBhbmQgdGhlaXIgc3VidHlwZXMsIHdlIG5vdyBtb3ZlIG9uIHRvIGZpdHRpbmcgbW9kZWxzLiBGaXJzdCB3ZSBuZWVkIHRvIGhhbmRsZSBtaXNzaW5nIHZhbHVlcy4gRm9yIHRoaXMgYW5hbHlzaXMgd2UgZHJvcCBjb2x1bW5zIHdoaWNoIGhhdmUgbWlzc2luZyB2YWx1ZXMuDQoNCmBgYHtyfQ0KdXBkYXRlZFRyYWluaW5nRGF0YSA9IHRyYWluaW5nRGF0YVssIGNvbFN1bXMoaXMubmEodHJhaW5pbmdEYXRhKSkgPT0gMF0NCnVwZGF0ZWRUZXN0RGF0YSA9IHRlc3REYXRhWywgY29sU3Vtcyhpcy5uYSh0ZXN0RGF0YSkpID09IDBdDQoNCmZlYXR1cmVOYW1lcyA9IG5hbWVzKHVwZGF0ZWRUcmFpbmluZ0RhdGEpWzIgOiAobmNvbCh1cGRhdGVkVHJhaW5pbmdEYXRhKSAtIDEpXQ0KDQpmb3IoayBpbiBmZWF0dXJlTmFtZXMpew0KICBpZihjbGFzcyh1cGRhdGVkVHJhaW5pbmdEYXRhW1trXV0pID09ICdjaGFyYWN0ZXInKXsNCiAgIGxldmVsID0gdW5pcXVlKGModXBkYXRlZFRyYWluaW5nRGF0YVtba11dLCB1cGRhdGVkVGVzdERhdGFbW2tdXSkpDQogICB1cGRhdGVkVHJhaW5pbmdEYXRhW1trXV0gPSBhcy5pbnRlZ2VyKGZhY3Rvcih1cGRhdGVkVHJhaW5pbmdEYXRhW1trXV0sIGxldmVscyA9IGxldmVsKSkNCiAgIHVwZGF0ZWRUZXN0RGF0YVtba11dID0gYXMuaW50ZWdlcihmYWN0b3IodXBkYXRlZFRlc3REYXRhW1trXV0sIGxldmVscyA9IGxldmVsKSkgDQogIH0NCn0NCmBgYA0KTm93IHRoYXQgdGhlIGRhdGEgaXMgY2xlYW5lZCwgbGV0cyBmaXQgZmV3IG1vZGVscw0KYGBge3J9DQp4Z2JNb2RlbCA9IHhnYm9vc3QoZGF0YSA9IGRhdGEubWF0cml4KHVwZGF0ZWRUcmFpbmluZ0RhdGFbLCBmZWF0dXJlTmFtZXNdKSwgbGFiZWwgPSB1cGRhdGVkVHJhaW5pbmdEYXRhJFJlc3BvbnNlLCBldGEgPSAwLjAyNSwgZGVwdGggPSAxMCwgbnJvdW5kcyA9IDEwMCwgb2JqZWN0aXZlID0gJ3JlZzpsaW5lYXInLCBldmFsX21ldHJpYyA9ICdybXNlJykNCmltcE1hdHJpeCA9IHhnYi5pbXBvcnRhbmNlKG1vZGVsID0geGdiTW9kZWwpDQppbXBNYXRyaXh0b3AxMCA9IGltcE1hdHJpeFtjKDE6MTApLCBdDQp4Z2IucGxvdC5pbXBvcnRhbmNlKGltcG9ydGFuY2VfbWF0cml4ID0gaW1wTWF0cml4dG9wMTApDQoNCmltcE1hdHJpeHRvcDMwID0gaW1wTWF0cml4W2MoMTozMCksIF0NCnhnYi5wbG90LmltcG9ydGFuY2UoaW1wb3J0YW5jZV9tYXRyaXggPSBpbXBNYXRyaXh0b3AzMCkNCmBgYA0KDQpgYGB7cn0NCnN1Ym1pc3Npb25EYXRhID0gZGF0YS5mcmFtZShJZCA9IHVwZGF0ZWRUZXN0RGF0YSRJZCkNCnN1Ym1pc3Npb25EYXRhJFJlc3BvbnNlID0gcm91bmQocHJlZGljdCh4Z2JNb2RlbCwgZGF0YS5tYXRyaXgodXBkYXRlZFRlc3REYXRhWywgZmVhdHVyZU5hbWVzXSkpKQ0KYGBgDQpRdWVzdGlvbnM6DQoxLiBBIHJlZmluZWQgc3RhdGVtZW50IG9mIHRoZSBtb2RlbCdzIHB1cnBvc2UgYW5kIGFwcGxpY2F0aW9uIGZyb20gcHJvamVjdCBDb21wb25lbnQgMS4NCiAgVGhlIG1haW4gcHVycG9zZSBvZiBtb2RlbCBkZXZlbG9wbWVudCB3YXMgdG8gbG9vayBmb3IgcGFyYW1ldGVycyB0aGF0IGhhdmUgYSBzdHJvbmcgaW5mbHVlbmNlIG9uIHRoZSBmaW5hbCBkZWNpc2lvbiBhc3NvY2lhdGVkIHdpdGggYW4gbGlmZSBpbnN1cmFuY2UgYXBwbGljYXRpb24uDQogIA0KMi4gQSByZWZpbmVkIHN0YXRlbWVudCBvZiB0aGUgZGF0YSB1c2VkIGluIHRoZSBtb2RlbCwgYXMgd2VsbCBhcyB0cmFuc2Zvcm1hdGlvbnMgYW5kIGFic3RyYWN0aW9ucyB5b3UgcGVyZm9ybWVkLg0KICBJbiBvcmRlciB0byBhY2hlaXZlIHRoaXMsIGFuIEV4dHJlbWUgR3JhZGllbnQgQm9vc3QgbW9kZWwgd2FzIGZpdCBvbiB0aGUgdHJhaW5pbmcgZGF0YXNldC4gQmVmb3JlIFhHQm9vc3QgbW9kZWwgd2FzIGZpdCBvbiB0aGUgZGF0YXNldCwgdGhlIGRhdGEgd2FzIHNjcnViYmVkIHRvIGRpc2NhcmQgdmFyaWFibGVzIChjb2x1bW5zKSB3aXRoIE5VTEwgdmFsdWVzLiBGdXJ0aGVyIHZhcmlhYmxlcyBvZiBjaGFyYWN0ZXIgdHlwZSAoUHJvZHVjdF9JbmZvXzIpIHdhcyBjb2RlZCB3aXRoIE51bWVyaWMgSUQncyBiYXNlZCBvbiB0aGUgYXNzdW1wdGlvbiB0aGF0IHRoZXkgYXJlIGNhdGVnb3JpY2FsLiBJbiBhZGRpdGlvbiB0byB0aGlzIGR1cmluZyB0aGUgZXhwbG9yYXRvcnkgZGF0YSBhbmFseXNpcyBwaGFzZSBzZXZlcmFsIHRyYW5zZm9ybWF0aW9uL2Fic3RyYWN0aW9uIHRlY2huaXF1ZSB3YXMgdXNlZCAoZ2F0aGVyLCBjYWxjdWxhdGUgcGVyY2VudCBOVUxMIHZhbHVlcyBmb3IgZWFjaCByZXNwb25zZSB0eXBlIGV0Yy4pDQogIA0KMy4gQSByZWZpbmVkIHN0YXRlbWVudCBhYm91dCBpdHMgdmFsaWRpdHkgYW5kIGdlbmVyYWxpemFiaWxpdHkuDQogIFRoZSBwYXJhbWV0ZXJzIHNlbGVjdGVkIGJ5IHRoZSBYR0Jvb3N0IG1vZGVsIGFzIGhhdmluZyBhIHN0cm9uZyBpbmZsdWVuY2Ugb24gdGhlIGRlY2lzaW9uIG9mIGEgbGlmZSBpbnN1cmFuY2UgYXBwbGljYXRpb24gbWFrZXMgc2Vuc2UuIFdlIGNhbiBzZWUgUGh5c2ljYWwgYXR0cmlidXRlcyAoQk1JLCBBZ2UpIGFuZCBNZWRpY2FsIEhpc3RvcnkgaGF2ZSB0aGUgc3Ryb25nZXN0IGluZmx1ZW5jZS4gSW4gdGVybXMgb2YgZ2VuZXJhbGl6YWJpbGl0eSBpdCBzZWVtcyB0byBtYWtlIHNlbnNlIHRoYXQgTWVkaWNhbCBiYWNrZ3JvdW5kIGFuZCBQaHlzaWNhbCBhdHRyaWJ1dGVzIG9mIGEgcGVyc29uIGhhdmUgc3Ryb25nIGluZmx1ZW5jZSBvbiBsaWZlIGluc3VyYW5jZSBhcHBsaWNhdGlvbiBkZWNpc2lvbi4NCiAgDQo0LiBBIHJlZmluZWQgZ3JhcGhpY2FsIGRlcGljdGlvbiBvZiB0aGUgbW9zdCByZXZlYWxpbmcgYW5kIGludGVyZXN0aW5nIGVsZW1lbnQgb2YgdGhlIG1vZGVsLg0KICBXaGVuIHdlIGV4cGFuZCBvdXIgc2VsZWN0aW9uIG9mIHZhcmlhYmxlIGltcG9ydGFuY2UgdG8gMzAgdG9wIHZhcmlhYmxlcyB0aGF0IGluZmx1ZW5jZSBMaWZlIGluc3VyYW5jZSBkZWNpc2lvbiwgaXQgd2FzIGludGVyZXN0aW5nIHRvIHNlZSB0aGF0IG1vc3Qgb2YgdGhlIGFwcGxpY2FudCBlbXBsb3ltZW50IGluZm9ybWF0aW9uIGhhcmRseSBtYXR0ZXJlZC4gSW5mYWN0LCBvbmx5IG9uZSBvZiB0aGUgZW1wbG95bWVudCBwYXJhbWV0ZXJzIG1hZGUgaXQgdG8gdGhlIHRvcCAzMCBsaXN0Lg==